diff --git a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Position.java b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Position.java new file mode 100644 index 0000000..0f4fac5 --- /dev/null +++ b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Position.java @@ -0,0 +1,25 @@ +package org.nittalab.dtram.languageserver.model; + +/** + * {@link Position} class represents positions in text documents. + * + * @author Shohei Yamagiwa + * @since 0.1 + */ +public class Position { + private final int line; + private final int column; + + public Position(int line, int column) { + this.line = line; + this.column = column; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } +} diff --git a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Token.java b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Token.java new file mode 100644 index 0000000..1d69bad --- /dev/null +++ b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Token.java @@ -0,0 +1,112 @@ +package org.nittalab.dtram.languageserver.model; + +import java.util.Objects; + +/** + * {@link Token} class represents the part of all texts in model files. + * + * @author Shohei Yamagiwa + * @since 0.1 + */ +public class Token { + /** + * A text of the token + */ + private String text; + + /** + * A position the token starts + */ + private Position from; + + /** + * A position the token ends + */ + private Position to; + + /** + * Indicates whether the token is the minimal block or not + */ + private boolean atomic; + + /** + * Constructs a new default token. + * + * @author Shohei Yamagiwa + * @since 0.1 + */ + public Token() { + this.text = ""; + this.from = new Position(0, 0); + this.to = new Position(0, 0); + this.atomic = false; + } + + /** + * Constructs a new token with given properties. + * + * @param text + * @param from + * @param to + * @param atomic + * @author Shohei Yamagiwa + * @since 0.1 + */ + public Token(final String text, final Position from, final Position to, final boolean atomic) { + this.text = text; + this.from = from; + this.to = to; + this.atomic = atomic; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public Position getPositionFrom() { + return from; + } + + public void setPositionFrom(Position from) { + this.from = from; + } + + public Position getPositionTo() { + return to; + } + + public void setPositionTo(Position to) { + this.to = to; + } + + public boolean isAtomic() { + return atomic; + } + + public void setAtomic(boolean atomic) { + this.atomic = atomic; + } + + @Override + public String toString() { + return text + " " + "starts from " + "line: " + from.getLine() + ", " + "column: " + from.getColumn() + " " + "to" + " " + "line: " + to.getLine() + ", " + "column: " + to.getColumn() + ", " + "isAtomic: " + atomic; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + Token other = (Token) o; + return Objects.equals(text, other.text) && Objects.equals(from, other.from) && Objects.equals(to, other.to) && Objects.equals(atomic, other.atomic); + } + + @Override + public int hashCode() { + return Objects.hash(text, from, to, atomic); + } +} diff --git a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Tokens.java b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Tokens.java new file mode 100644 index 0000000..89d6f02 --- /dev/null +++ b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/model/Tokens.java @@ -0,0 +1,70 @@ +package org.nittalab.dtram.languageserver.model; + +/** + * {@link Tokens} class holds all tokens used in DTRAM model text. + * + * @author Shohei Yamagiwa + * @since 0.1 + */ +public class Tokens { + public static final String SPACE = " "; + + public static final String CHANNEL = "channel"; + public static final String INIT = "init"; + public static final String IN = "in"; + public static final String OUT = "out"; + public static final String REF = "ref"; + public static final String SUB_CHANNEL = "sub"; + public static final String NATIVE = "native"; + + public static final String LEFT_CURLY_BRACKET = "{"; + public static final String RIGHT_CURLY_BRACKET = "}"; + public static final String LEFT_CURLY_BRACKET_REGX = "\\{"; + public static final String RIGHT_CURLY_BRACKET_REGX = "\\}"; + public static final String LEFT_BRACKET = "("; + public static final String RIGHT_BRACKET = ")"; + public static final String LEFT_BRACKET_REGX = "\\("; + public static final String RIGHT_BRACKET_REGX = "\\)"; + public static final String LEFT_SQUARE_BRACKET = "["; + public static final String RIGHT_SQUARE_BRACKET = "]"; + public static final String LEFT_SQUARE_BRACKET_REGX = "\\["; + public static final String RIGHT_SQUARE_BRACKET_REGX = "\\]"; + + public static final String ADD = "+"; + public static final String ADD_REGX = "\\+"; + public static final String SUB = "-"; + public static final String SUB_REGX = "\\-"; + public static final String MUL = "*"; + public static final String MUL_REGX = "\\*"; + public static final String DIV = "/"; + public static final String DIV_REGX = "/"; + public static final String MOD = "%"; + + public static final String MINUS = "-"; + + public static final String EQ = "=="; + public static final String NEQ = "!="; + public static final String GT = ">"; + public static final String LT = "<"; + public static final String GE = ">="; + public static final String LE = "<="; + public static final String AND = "&&"; + public static final String OR = "||"; + public static final String OR_REGX = "\\|\\|"; + public static final String NEG = "!"; + + public static final String EQUALS = "="; + public static final String ASSIGNMENT = "="; + public static final String COMMA = ","; + public static final String COLON = ":"; + public static final String DOT = "."; + public static final String DOT_REGX = "\\."; + public static final String DOUBLE_QUOT = "\""; + + public static final String COMMENT = "//"; + public static final String COMMENT_REGX = "\\/\\/"; + public static final String MULTILINE_COMMENT_START = "/*"; + public static final String MULTILINE_COMMENT_START_REGX = "\\/\\*"; + public static final String MULTILINE_COMMENT_END = "*/"; + public static final String MULTILINE_COMMENT_END_REGX = "\\*\\/"; +} diff --git a/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java new file mode 100644 index 0000000..9efaa8c --- /dev/null +++ b/LanguageServer/src/main/java/org/nittalab/dtram/languageserver/utils/Tokenizer.java @@ -0,0 +1,214 @@ +package org.nittalab.dtram.languageserver.utils; + +import org.nittalab.dtram.languageserver.model.Position; +import org.nittalab.dtram.languageserver.model.Token; +import org.nittalab.dtram.languageserver.model.Tokens; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; + +/** + * Tokenizes the model file + * + * @author Shohei Yamagiwa + * @since 0.1 + */ +public final class Tokenizer { + /** + * The {@link BufferedReader} that contains model text in memory. + */ + private final BufferedReader reader; + + /** + * Initializes {@link Tokenizer} with prepared {@link BufferedReader} + * + * @param reader Prepared model text in {@link BufferedReader} + * @author Shohei Yamagiwa + * @since 0.1 + */ + public Tokenizer(BufferedReader reader) { + this.reader = reader; + } + + /** + * Initializes {@link Tokenizer} with given {@code text} + * + * @param text The text from specific model file. + * @author Shohei Yamagiwa + * @since 0.1 + */ + public Tokenizer(String text) { + this(new BufferedReader(new StringReader(text))); + } + + /** + * Tokenizes all texts in {@link BufferedReader} + * + * @return All tokens in {@link ArrayList} + * @throws IOException Be thrown if {@link BufferedReader} fails to read text lines + * @author Shohei Yamagiwa + * @since 0.1 + */ + public ArrayList execute() throws IOException { + ArrayList allTokens = new ArrayList<>(); + int line = 1; + int column = 1; + while (true) { + String lineStr = reader.readLine(); + + /* No more lines in the buffer. so finish reading */ + if (lineStr == null) { + break; + } + + /* Skip empty line */ + if (lineStr.isEmpty()) { + line++; + continue; + } + + /* Adds whole line as the token */ + allTokens.add(new Token(lineStr, new Position(line, column), new Position(line, column + lineStr.codePointCount(0, lineStr.length()) - 1), false)); + line++; + } + allTokens = splitBySpace(allTokens); + allTokens = extractMultilineComments(allTokens); + allTokens = extractComments(allTokens); + return allTokens; + } + + /** + * Splits tokens by half-width space. + * + * @param original Original tokens + * @return {@link ArrayList} of {@link Token} extracted by splitting + * @author Shohei Yamagiwa + * @since 0.1 + */ + private ArrayList splitBySpace(ArrayList original) { + ArrayList newTokens = new ArrayList<>(); + + original.forEach(originalToken -> { + String[] tokens = originalToken.getText().split(" ", -1); + int line = originalToken.getPositionFrom().getLine(); + int column = originalToken.getPositionFrom().getColumn(); + for (int i = 0; i < tokens.length; i++) { + String prevToken = i != 0 ? tokens[i - 1] : null; + String token = tokens[i]; + if (token == null) { + break; + } + int tokenLength; + + /* Count the space between tokens */ + if (prevToken != null && !prevToken.isBlank() && !token.isBlank()) { + tokenLength = 1; + newTokens.add(new Token(Tokens.SPACE, new Position(line, column), new Position(line, column + tokenLength - 1), true)); + column += tokenLength; + } + if (token.isBlank()) { + tokenLength = 1; + newTokens.add(new Token(Tokens.SPACE, new Position(line, column), new Position(line, column + tokenLength - 1), true)); + } else { + tokenLength = token.codePointCount(0, token.length()); + newTokens.add(new Token(token, new Position(line, column), new Position(line, column + tokenLength - 1), false)); + } + column += tokenLength; + } + }); + return newTokens; + } + + /** + * Extracts multiline comment from given tokens. + * + * @param original Original tokens + * @return {@link ArrayList} of {@link Token} with extracted multiline comments + * @author Shohei Yamagiwa + * @since 0.1 + */ + private ArrayList extractMultilineComments(ArrayList original) { + ArrayList newTokens = new ArrayList<>(); + + Token commentToken = new Token(); + boolean isComment = false; + StringBuilder commentText = new StringBuilder(); + + for (Token originalToken : original) { + switch (originalToken.getText()) { + case Tokens.MULTILINE_COMMENT_START -> { + commentToken = new Token(); + commentToken.setPositionFrom(originalToken.getPositionFrom()); + + commentText.append(originalToken.getText()); + isComment = true; + } + case Tokens.MULTILINE_COMMENT_END -> { + commentText.append(originalToken.getText()); + isComment = false; + + commentToken.setPositionTo(originalToken.getPositionTo()); + commentToken.setText(commentText.toString()); + commentToken.setAtomic(true); + + newTokens.add(commentToken); + } + default -> { + if (isComment) { + commentText.append(originalToken.getText()); + } else { + newTokens.add(originalToken); + } + } + } + } + return newTokens; + } + + /** + * Extracts single-line comment from given tokens. + * + * @param original Original tokens + * @return New {@link ArrayList} of {@link Token} with extracted single-line comments + * @author Shohei Yamagiwa + * @since 0.1 + */ + private ArrayList extractComments(ArrayList original) { + ArrayList newTokens = new ArrayList<>(); + + int commentLine = -1; + Token commentToken = new Token(); + + for (Token originalToken : original) { + if (originalToken.getText().equals(Tokens.COMMENT)) { + if (commentLine == -1) { + commentToken = new Token(); + commentToken.setText(originalToken.getText()); + commentToken.setPositionFrom(originalToken.getPositionFrom()); + commentToken.setAtomic(true); + + commentLine = originalToken.getPositionFrom().getLine(); + } else if (originalToken.getPositionFrom().getLine() == commentLine && originalToken.getPositionTo().getLine() == commentLine) { + commentToken.setText(commentToken.getText() + originalToken.getText()); + commentToken.setPositionTo(originalToken.getPositionTo()); + } + } else { + if (commentLine != -1) { + if (originalToken.getPositionFrom().getLine() == commentLine && originalToken.getPositionTo().getLine() == commentLine) { + commentToken.setText(commentToken.getText() + originalToken.getText()); + commentToken.setPositionTo(originalToken.getPositionTo()); + } else { + newTokens.add(commentToken); + newTokens.add(originalToken); + commentLine = -1; + } + } else { + newTokens.add(originalToken); + } + } + } + return newTokens; + } +}