diff --git a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java index 06ee260..4122fa8 100644 --- a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java +++ b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java @@ -7,9 +7,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; -import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -19,7 +17,7 @@ * @author Shohei Yamagiwa * @since 0.1 */ -public final class Tokenizer { +public class Tokenizer { /** * The {@link BufferedReader} that contains model text in memory. */ @@ -55,7 +53,7 @@ * @author Shohei Yamagiwa * @since 0.1 */ - public ArrayList<Token> execute() throws IOException { + public final ArrayList<Token> execute() throws IOException { ArrayList<Token> allTokens = new ArrayList<>(); int line = 1; int column = 1; @@ -93,7 +91,7 @@ * @author Shohei Yamagiwa * @since 0.1 */ - private ArrayList<Token> splitBySpace(ArrayList<Token> original) { + protected final ArrayList<Token> splitBySpace(ArrayList<Token> original) { ArrayList<Token> newTokens = new ArrayList<>(); original.forEach(originalToken -> { @@ -135,7 +133,7 @@ * @author Shohei Yamagiwa * @since 0.1 */ - private ArrayList<Token> extractMultilineComments(ArrayList<Token> original) { + protected final ArrayList<Token> extractMultilineComments(ArrayList<Token> original) { ArrayList<Token> newTokens = new ArrayList<>(); Token commentToken = new Token(); @@ -181,7 +179,7 @@ * @author Shohei Yamagiwa * @since 0.1 */ - private ArrayList<Token> extractComments(ArrayList<Token> original) { + protected final ArrayList<Token> extractComments(ArrayList<Token> original) { ArrayList<Token> newTokens = new ArrayList<>(); int commentLine = -1; @@ -218,46 +216,77 @@ return newTokens; } - private ArrayList<Token> splitBySymbol(final ArrayList<Token> original, final String symbolStr, final String symbolRegex) { - final int symbolLength = symbolStr.codePointCount(0, symbolStr.length()); - final Pattern pattern = Pattern.compile(symbolRegex); // Compile regex to use in the loop. - - final ArrayList<Token> newTokens = new ArrayList<>(); + protected static ArrayList<Token> splitBySymbol(final ArrayList<Token> original, final String symbolStr, final String symbolRegex) { + Pattern pattern = Pattern.compile(symbolRegex); // Compile regex to use in the loop. + ArrayList<Token> newTokens = new ArrayList<>(); for (Token originalToken : original) { if (originalToken.isAtomic()) { // Token is atomic so no more splits are needed + newTokens.add(originalToken); continue; } /* Specified symbol isn't contained in the token */ Matcher matcher = pattern.matcher(originalToken.getText()); if (!matcher.find()) { + newTokens.add(originalToken); continue; } - // ( accounts ( accounts:List,signup(name:Str))) - boolean startsWithToken = originalToken.getText().startsWith(symbolStr); - boolean endsWithToken = originalToken.getText().endsWith(symbolStr); - ArrayDeque<String> tokens = new ArrayDeque<>(Arrays.asList(originalToken.getText().split(symbolRegex))); + /** + * [token0, token1] => token0( token1 + * ["", token0] => ""( token0 + * [token0, ""] => token0( "" + * [token0, token1, token2] => token0( token1( token2 + * [token0, "", token1] => token0( ""( token1 + * ["", token0, token1] => ( token0( token1 + * [token0, token1, ""] => token0( token1( "" + */ /* Split into several tokens */ + String[] tokens = originalToken.getText().split(symbolRegex, -1); ArrayList<Token> splitTokens = new ArrayList<>(); - Position start = originalToken.getStartPos(); - Position end = originalToken.getEndPos(); + Position lastTokenPos = originalToken.getStartPos(); - if (startsWithToken) { - Token symbol = new Token(symbolStr, start, start.move(0, symbolLength - 1), true); + for (int i = 0; i < tokens.length; i++) { + String tokenStr = tokens[i]; - String restStr = tokens.poll(); - int restLength = restStr.codePointCount(0, restStr.length()); -// Token rest = new Token(restStr, , false); + if (i == 0) { + if (tokenStr.isEmpty()) { + Token symbol = new Token(symbolStr, lastTokenPos, true); + splitTokens.add(symbol); + lastTokenPos = symbol.getEndPos(); + } else { + Token newToken = new Token(tokenStr, lastTokenPos, false); + splitTokens.add(newToken); + lastTokenPos = newToken.getEndPos(); + + Token symbol = new Token(symbolStr, lastTokenPos.move(0, 1), true); + splitTokens.add(symbol); + lastTokenPos = symbol.getEndPos(); + } + continue; + } + if (i == tokens.length - 1 && !tokenStr.isEmpty()) { + Token newToken = new Token(tokenStr, lastTokenPos.move(0, 1), false); + splitTokens.add(newToken); + break; + } + if (tokenStr.isEmpty()) { + Token symbol = new Token(symbolStr, lastTokenPos.move(0, 1), true); + splitTokens.add(symbol); + lastTokenPos = symbol.getEndPos(); + } else { + Token newToken = new Token(tokenStr, lastTokenPos.move(0, 1), false); + splitTokens.add(newToken); + lastTokenPos = newToken.getEndPos(); + + Token symbol = new Token(symbolStr, lastTokenPos.move(0, 1), true); + splitTokens.add(symbol); + lastTokenPos = symbol.getEndPos(); + } } - for (String token : tokens) { - // TODO: Implement this line - } - if (endsWithToken) { - // TODO: Implement this line - } + newTokens.addAll(splitTokens); } return newTokens; }