germax26
66cd54e532
I plan to make a few changes to my language, and I will then update these files later. I am moving them because I don't want them polluting the main directory. I probably should have done this to begin with, but better late than never.
874 lines
35 KiB
Plaintext
874 lines
35 KiB
Plaintext
enum Keyword {
|
|
Enum,
|
|
Struct,
|
|
Func,
|
|
If,
|
|
Else,
|
|
While,
|
|
Break,
|
|
Continue,
|
|
Do,
|
|
For,
|
|
To,
|
|
In,
|
|
Match,
|
|
Case,
|
|
Assert,
|
|
Return,
|
|
Lambda
|
|
}
|
|
|
|
enum OptionalKeyword {
|
|
Some(Keyword),
|
|
None
|
|
}
|
|
|
|
func keyword_from_str(keyword: str) -> OptionalKeyword {
|
|
if keyword == "enum" return OptionalKeyword.Some(Keyword.Enum);
|
|
if keyword == "struct" return OptionalKeyword.Some(Keyword.Struct);
|
|
if keyword == "func" return OptionalKeyword.Some(Keyword.Func);
|
|
if keyword == "if" return OptionalKeyword.Some(Keyword.If);
|
|
if keyword == "else" return OptionalKeyword.Some(Keyword.Else);
|
|
if keyword == "while" return OptionalKeyword.Some(Keyword.While);
|
|
if keyword == "break" return OptionalKeyword.Some(Keyword.Break);
|
|
if keyword == "continue" return OptionalKeyword.Some(Keyword.Continue);
|
|
if keyword == "do" return OptionalKeyword.Some(Keyword.Do);
|
|
if keyword == "for" return OptionalKeyword.Some(Keyword.For);
|
|
if keyword == "to" return OptionalKeyword.Some(Keyword.To);
|
|
if keyword == "in" return OptionalKeyword.Some(Keyword.In);
|
|
if keyword == "match" return OptionalKeyword.Some(Keyword.Match);
|
|
if keyword == "case" return OptionalKeyword.Some(Keyword.Case);
|
|
if keyword == "assert" return OptionalKeyword.Some(Keyword.Assert);
|
|
if keyword == "return" return OptionalKeyword.Some(Keyword.Return);
|
|
if keyword == "lambda" return OptionalKeyword.Some(Keyword.Lambda);
|
|
return OptionalKeyword.None;
|
|
}
|
|
|
|
func keyword_to_str(keyword: Keyword) -> str {
|
|
match keyword in {
|
|
case Enum return "enum";
|
|
case Struct return "struct";
|
|
case Func return "func";
|
|
case If return "if";
|
|
case Else return "else";
|
|
case While return "while";
|
|
case Break return "break";
|
|
case Continue return "continue";
|
|
case Do return "do";
|
|
case For return "for";
|
|
case To return "to";
|
|
case In return "in";
|
|
case Match return "match";
|
|
case Case return "case";
|
|
case Assert return "assert";
|
|
case Return return "return";
|
|
case Lambda return "lambda";
|
|
}
|
|
assert false, "Invalid keyword";
|
|
}
|
|
|
|
enum Symbol {
|
|
Open,
|
|
Close,
|
|
OpenCurly,
|
|
CloseCurly,
|
|
Comma,
|
|
OpenSquare,
|
|
CloseSquare,
|
|
Colon,
|
|
Left,
|
|
Right,
|
|
Arrow,
|
|
Semicolon,
|
|
Equal,
|
|
Dequal,
|
|
Exclamation,
|
|
NotEqual,
|
|
Dot,
|
|
Plus,
|
|
Dash,
|
|
Asterisk,
|
|
Dasterisk,
|
|
Slash,
|
|
QuestionMark,
|
|
Ampersand,
|
|
Dampersand,
|
|
Pipe,
|
|
Dpipe,
|
|
Dleft,
|
|
Dright,
|
|
GreaterEqual,
|
|
LesserEqual,
|
|
Percent,
|
|
Tilde,
|
|
Carot
|
|
}
|
|
|
|
enum OptionalSymbol {
|
|
Some(Symbol),
|
|
None
|
|
}
|
|
|
|
func symbol_from_str(symbol: str) -> OptionalSymbol {
|
|
if symbol == "(" return OptionalSymbol.Some(Symbol.Open);
|
|
if symbol == ")" return OptionalSymbol.Some(Symbol.Close);
|
|
if symbol == "{" return OptionalSymbol.Some(Symbol.OpenCurly);
|
|
if symbol == "}" return OptionalSymbol.Some(Symbol.CloseCurly);
|
|
if symbol == "," return OptionalSymbol.Some(Symbol.Comma);
|
|
if symbol == "[" return OptionalSymbol.Some(Symbol.OpenSquare);
|
|
if symbol == "]" return OptionalSymbol.Some(Symbol.CloseSquare);
|
|
if symbol == ":" return OptionalSymbol.Some(Symbol.Colon);
|
|
if symbol == "<" return OptionalSymbol.Some(Symbol.Left);
|
|
if symbol == ">" return OptionalSymbol.Some(Symbol.Right);
|
|
if symbol == "->" return OptionalSymbol.Some(Symbol.Arrow);
|
|
if symbol == ";" return OptionalSymbol.Some(Symbol.Semicolon);
|
|
if symbol == "=" return OptionalSymbol.Some(Symbol.Equal);
|
|
if symbol == "==" return OptionalSymbol.Some(Symbol.Dequal);
|
|
if symbol == "!" return OptionalSymbol.Some(Symbol.Exclamation);
|
|
if symbol == "!=" return OptionalSymbol.Some(Symbol.NotEqual);
|
|
if symbol == "." return OptionalSymbol.Some(Symbol.Dot);
|
|
if symbol == "+" return OptionalSymbol.Some(Symbol.Plus);
|
|
if symbol == "-" return OptionalSymbol.Some(Symbol.Dash);
|
|
if symbol == "*" return OptionalSymbol.Some(Symbol.Asterisk);
|
|
if symbol == "**" return OptionalSymbol.Some(Symbol.Dasterisk);
|
|
if symbol == "/" return OptionalSymbol.Some(Symbol.Slash);
|
|
if symbol == "?" return OptionalSymbol.Some(Symbol.QuestionMark);
|
|
if symbol == "&" return OptionalSymbol.Some(Symbol.Ampersand);
|
|
if symbol == "&&" return OptionalSymbol.Some(Symbol.Dampersand);
|
|
if symbol == "|" return OptionalSymbol.Some(Symbol.Pipe);
|
|
if symbol == "||" return OptionalSymbol.Some(Symbol.Dpipe);
|
|
if symbol == "<<" return OptionalSymbol.Some(Symbol.Dleft);
|
|
if symbol == ">>" return OptionalSymbol.Some(Symbol.Dright);
|
|
if symbol == ">=" return OptionalSymbol.Some(Symbol.GreaterEqual);
|
|
if symbol == "<=" return OptionalSymbol.Some(Symbol.LesserEqual);
|
|
if symbol == "%" return OptionalSymbol.Some(Symbol.Percent);
|
|
if symbol == "~" return OptionalSymbol.Some(Symbol.Tilde);
|
|
if symbol == "^" return OptionalSymbol.Some(Symbol.Carot);
|
|
assert false, "Unimplemented symbol '%s'" % symbol;
|
|
}
|
|
|
|
func symbol_to_str(symbol: Symbol) -> str {
|
|
match symbol in {
|
|
case Open return "(";
|
|
case Close return ")";
|
|
case OpenCurly return "{";
|
|
case CloseCurly return "}";
|
|
case Comma return ",";
|
|
case OpenSquare return "[";
|
|
case CloseSquare return "]";
|
|
case Colon return ":";
|
|
case Left return "<";
|
|
case Right return ">";
|
|
case Arrow return "->";
|
|
case Semicolon return ";";
|
|
case Equal return "=";
|
|
case Dequal return "==";
|
|
case Exclamation return "!";
|
|
case NotEqual return "!=";
|
|
case Dot return ".";
|
|
case Plus return "+";
|
|
case Dash return "-";
|
|
case Asterisk return "*";
|
|
case Dasterisk return "**";
|
|
case Slash return "/";
|
|
case QuestionMark return "?";
|
|
case Ampersand return "&";
|
|
case Dampersand return "&&";
|
|
case Pipe return "|";
|
|
case Dpipe return "||";
|
|
case Dleft return "<<";
|
|
case Dright return ">>";
|
|
case GreaterEqual return ">=";
|
|
case LesserEqual return "<=";
|
|
case Percent return "%";
|
|
case Tilde return "~";
|
|
case Carot return "^";
|
|
}
|
|
assert false, "Invalid symbol";
|
|
}
|
|
|
|
enum TokenContents {
|
|
Keyword(Keyword),
|
|
Identifier(str),
|
|
Number(int),
|
|
String(str),
|
|
Symbol(Symbol),
|
|
Eof
|
|
}
|
|
|
|
func token_contents_to_str(token: TokenContents) -> str {
|
|
match token in {
|
|
case Keyword(keyword) return "Keyword(%s)" % keyword_to_str(keyword);
|
|
case Identifier(string) return "Identifier(%s)" % string;
|
|
case Number(number) return "Number(%d)" % number;
|
|
case String(string) return "String(\"%s\")" % string;
|
|
case Symbol(symbol) return "Symbol('%s')" % symbol_to_str(symbol);
|
|
case Eof return "Eof";
|
|
}
|
|
}
|
|
|
|
struct Token {
|
|
line: int,
|
|
col: int,
|
|
value: str,
|
|
contents: TokenContents
|
|
}
|
|
|
|
func token_to_str(token: Token) -> str {
|
|
return token_contents_to_str(token.contents)+"{:"+int_to_str(token.line)+":"+int_to_str(token.col)+"}";
|
|
}
|
|
|
|
enum OptionalToken {
|
|
Some(Token),
|
|
None
|
|
}
|
|
|
|
func is_some_token(maybe_token: OptionalToken) -> bool {
|
|
match maybe_token in {
|
|
case Some(_) return true;
|
|
case None return false;
|
|
}
|
|
assert false, "Unreachable";
|
|
}
|
|
|
|
struct Lexer {
|
|
source: str,
|
|
location: int,
|
|
line: int,
|
|
col: int,
|
|
peeked_token: OptionalToken
|
|
}
|
|
|
|
func new_lexer(source: str) -> Lexer {
|
|
return Lexer{
|
|
source = source,
|
|
location = 0,
|
|
line = 1,
|
|
col = 0,
|
|
peeked_token = OptionalToken.None
|
|
};
|
|
}
|
|
|
|
func lexer_from_file(path: str) -> Lexer return new_lexer(read(path));
|
|
|
|
func is_space(char: str) -> bool {
|
|
return char == " " || char == "\t" || char == "\n";
|
|
}
|
|
|
|
func is_digit(char: str) -> bool {
|
|
return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9";
|
|
}
|
|
|
|
func is_alpha(char: str) -> bool {
|
|
return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_";
|
|
}
|
|
|
|
func lexer_next_token(lexer: Lexer) -> Token {
|
|
match lexer.peeked_token in {
|
|
case Some(token) do {
|
|
lexer.peeked_token = OptionalToken.None;
|
|
return token;
|
|
}
|
|
}
|
|
|
|
while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do {
|
|
if lexer.source[lexer.location] == "\n" do {
|
|
lexer.line = lexer.line + 1;
|
|
lexer.col = 0;
|
|
}
|
|
lexer.location = lexer.location + 1;
|
|
}
|
|
|
|
if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof};
|
|
|
|
if is_digit(lexer.source[lexer.location]) do {
|
|
number_str: str = "";
|
|
while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do {
|
|
number_str = number_str + lexer.source[lexer.location];
|
|
lexer.location = lexer.location + 1;
|
|
}
|
|
number: int = str_to_int(number_str);
|
|
return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)};
|
|
} else if is_alpha(lexer.source[lexer.location]) do {
|
|
word_str: str = "";
|
|
while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do {
|
|
word_str = word_str + lexer.source[lexer.location];
|
|
lexer.location = lexer.location + 1;
|
|
}
|
|
match keyword_from_str(word_str) in {
|
|
case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)};
|
|
case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)};
|
|
}
|
|
assert false, "Identifier";
|
|
} else if lexer.source[lexer.location] == "\"" do {
|
|
lexer.location = lexer.location + 1;
|
|
string_str: str = "";
|
|
escaping: bool = false;
|
|
while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do {
|
|
escaping = escaping? false: lexer.source[lexer.location] == "\\";
|
|
string_str = string_str + lexer.source[lexer.location];
|
|
lexer.location = lexer.location + 1;
|
|
}
|
|
lexer.location = lexer.location + 1;
|
|
return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)};
|
|
} else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)};
|
|
} else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)};
|
|
} else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)};
|
|
} else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)};
|
|
} else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)};
|
|
} else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)};
|
|
} else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)};
|
|
} else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
|
|
lexer.location = lexer.location + 2;
|
|
return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)};
|
|
} else {
|
|
match symbol_from_str(lexer.source[lexer.location]) in {
|
|
case Some(symbol) do {
|
|
lexer.location = lexer.location + 1;
|
|
return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)};
|
|
}
|
|
case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location];
|
|
}
|
|
assert false, "Unreachable Symbol";
|
|
}
|
|
|
|
assert false, "Unreachable, next_token";
|
|
}
|
|
|
|
func lexer_peek_token(lexer: Lexer) -> Token {
|
|
match lexer.peeked_token in {
|
|
case Some(token) return token;
|
|
case None do {
|
|
token: Token = lexer_next_token(lexer);
|
|
lexer.peeked_token = OptionalToken.Some(token);
|
|
return token;
|
|
}
|
|
}
|
|
assert false, "Unreachable";
|
|
}
|
|
|
|
func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool {
|
|
token: Token = lexer_peek_token(lexer);
|
|
return token.contents == expected;
|
|
}
|
|
|
|
func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken {
|
|
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
|
|
return OptionalToken.None;
|
|
}
|
|
|
|
func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken {
|
|
for token in tokens do {
|
|
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
|
|
}
|
|
return OptionalToken.None;
|
|
}
|
|
|
|
func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token {
|
|
token: Token = lexer_next_token(lexer);
|
|
assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token));
|
|
return token;
|
|
}
|
|
|
|
func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool {
|
|
for token in tokens if lexer_check_token(lexer, token) return true;
|
|
return false;
|
|
}
|
|
|
|
enum TypeExpression {
|
|
Tuple(TypeExpression[]),
|
|
Union(TypeExpression[]),
|
|
List(TypeExpression),
|
|
Array(TypeExpression, int),
|
|
Name(str),
|
|
Specification(TypeExpression, TypeExpression[]),
|
|
Function(TypeExpression[], TypeExpression)
|
|
}
|
|
|
|
enum OptionalTypeExpression {
|
|
Some(TypeExpression),
|
|
None
|
|
}
|
|
|
|
func parse_type_primary(lexer: Lexer) -> TypeExpression {
|
|
base_type: TypeExpression;
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]);
|
|
|
|
types: TypeExpression[] = [parse_type(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type(lexer)];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
|
|
base_type = TypeExpression.Tuple(types);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
|
|
assert false, "Unimplemented parse_type_primary array";
|
|
} else {
|
|
base_type = TypeExpression.Name(parse_identifier(lexer));
|
|
}
|
|
|
|
closing: Symbol;
|
|
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do {
|
|
match lexer_next_token(lexer).contents in {
|
|
case Symbol(symbol) do {
|
|
match symbol in {
|
|
case OpenSquare match lexer_peek_token(lexer).contents in {
|
|
case Number(number) do {
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
|
|
base_type = TypeExpression.Array(base_type, number);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
match symbol in {
|
|
case OpenSquare closing = Symbol.CloseSquare;
|
|
case Left closing = Symbol.Right;
|
|
case _ assert false, "Unreachable";
|
|
}
|
|
|
|
match symbol in {
|
|
case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do {
|
|
base_type = TypeExpression.List(base_type);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
generics: TypeExpression[] = [parse_type(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)];
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(closing));
|
|
match base_type in {
|
|
case Specification assert false, "Cannot specify an already specified type";
|
|
}
|
|
|
|
base_type = TypeExpression.Specification(base_type, generics);
|
|
}
|
|
case _ assert false, "Unreachable";
|
|
}
|
|
}
|
|
|
|
return base_type;
|
|
}
|
|
|
|
func parse_type(lexer: Lexer) -> TypeExpression {
|
|
base_type: TypeExpression = parse_type_primary(lexer);
|
|
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type;
|
|
return_type: TypeExpression = parse_type(lexer);
|
|
match base_type in {
|
|
case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type);
|
|
}
|
|
return TypeExpression.Function([base_type], return_type);
|
|
}
|
|
|
|
struct TypeDeclaration {
|
|
name: str,
|
|
type_: TypeExpression
|
|
}
|
|
|
|
func parse_type_declaration(lexer: Lexer) -> TypeDeclaration {
|
|
entry_name: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
|
|
entry_type: TypeExpression = parse_type(lexer);
|
|
return TypeDeclaration{name=entry_name, type_=entry_type};
|
|
}
|
|
|
|
enum EnumEntry {
|
|
Const(str),
|
|
Tuple(str, TypeExpression[]),
|
|
Struct(str, TypeDeclaration[])
|
|
}
|
|
|
|
func parse_enum_entry(lexer: Lexer) -> EnumEntry {
|
|
entry_name: str = parse_identifier(lexer);
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
|
|
entry_types: TypeExpression[] = [parse_type(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
|
|
return EnumEntry.Tuple(entry_name, entry_types);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return EnumEntry.Struct(entry_name, []);
|
|
|
|
assert false, "Unimplemented parse_enum_entry";
|
|
}
|
|
|
|
return EnumEntry.Const(entry_name);
|
|
}
|
|
|
|
enum Expression {
|
|
FunctionCall(Expression, Expression[]),
|
|
Variable(str),
|
|
ArrayAccess(Expression, Expression),
|
|
Array(Expression[]),
|
|
FieldAccess(Expression, str),
|
|
Number(int),
|
|
String(str),
|
|
Tuple(Expression[]),
|
|
StructInstantiation(Expression, (str, Expression)[]),
|
|
LoopComrehension(Expression, str, Expression),
|
|
Return(Expression),
|
|
Ternary(Expression, Expression, Expression),
|
|
Or(Expression, Expression),
|
|
And(Expression, Expression),
|
|
Bor(Expression, Expression),
|
|
Bxor(Expression, Expression),
|
|
Band(Expression, Expression),
|
|
Equal(Expression, Expression),
|
|
NotEqual(Expression, Expression),
|
|
LessThan(Expression, Expression),
|
|
GreaterThan(Expression, Expression),
|
|
LessThanOrEqual(Expression, Expression),
|
|
GreaterThanOrEqual(Expression, Expression),
|
|
ShiftLeft(Expression, Expression),
|
|
ShiftRight(Expression, Expression),
|
|
Addition(Expression, Expression),
|
|
Subtract(Expression, Expression),
|
|
Multiplication(Expression, Expression),
|
|
Division(Expression, Expression),
|
|
Modulo(Expression, Expression),
|
|
Bnot(Expression),
|
|
Not(Expression),
|
|
UnaryPlus(Expression),
|
|
UnaryMinus(Expression)
|
|
}
|
|
|
|
enum OptionalExpression {
|
|
Some(Expression),
|
|
None
|
|
}
|
|
|
|
func parse_struct_argument(lexer: Lexer) -> (str, Expression) {
|
|
parameter: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
|
|
return (parameter, parse_expression(lexer));
|
|
}
|
|
|
|
func parse_primary(lexer: Lexer) -> Expression {
|
|
base_expression: Expression;
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]);
|
|
else {
|
|
elements: Expression[] = [parse_expression(lexer)];
|
|
singleton: bool = false;
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do {
|
|
if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do {
|
|
singleton = true;
|
|
break;
|
|
}
|
|
elements = elements + [parse_expression(lexer)];
|
|
}
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
|
|
base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0];
|
|
}
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]);
|
|
else {
|
|
expressions: Expression[] = [parse_expression(lexer)];
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
|
|
variable: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
|
|
expression: Expression = parse_expression(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
|
|
base_expression = Expression.LoopComrehension(expressions[0], variable, expression);
|
|
} else {
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
|
|
base_expression = Expression.Array(expressions);
|
|
}
|
|
}
|
|
} else {
|
|
match lexer_next_token(lexer).contents in {
|
|
case String(string) base_expression = Expression.String(string);
|
|
case Number(number) base_expression = Expression.Number(number);
|
|
case Identifier(string) base_expression = Expression.Variable(string);
|
|
case _token assert false, "Expected identifier, but got %s!" % token_to_str(_token);
|
|
}
|
|
}
|
|
|
|
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do {
|
|
match lexer_next_token(lexer).contents in {
|
|
case Symbol(symbol) match symbol in {
|
|
case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer));
|
|
case Open do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []);
|
|
else {
|
|
arguments: Expression[] = [parse_expression(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)];
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
|
|
base_expression = Expression.FunctionCall(base_expression, arguments);
|
|
}
|
|
}
|
|
case OpenSquare do {
|
|
index: Expression = parse_expression(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
|
|
base_expression = Expression.ArrayAccess(base_expression, index);
|
|
}
|
|
case OpenCurly do {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []);
|
|
else {
|
|
struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)];
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
|
|
base_expression = Expression.StructInstantiation(base_expression, struct_arguments);
|
|
}
|
|
}
|
|
case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol);
|
|
}
|
|
case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer));
|
|
}
|
|
}
|
|
|
|
return base_expression;
|
|
}
|
|
|
|
func parse_unary(lexer: Lexer) -> Expression {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer));
|
|
return parse_primary(lexer);
|
|
}
|
|
|
|
precedences: (Symbol, (Expression, Expression) -> Expression)[][] = [
|
|
[(Symbol.Dpipe, Expression.Or)],
|
|
[(Symbol.Dampersand, Expression.And)],
|
|
[(Symbol.Pipe, Expression.Bor)],
|
|
[(Symbol.Carot, Expression.Bxor)],
|
|
[(Symbol.Ampersand, Expression.Band)],
|
|
[(Symbol.Dequal, Expression.Equal), (Symbol.NotEqual, Expression.NotEqual)],
|
|
[(Symbol.Left, Expression.LessThan), (Symbol.Right, Expression.GreaterThan), (Symbol.LesserEqual, Expression.LessThanOrEqual), (Symbol.GreaterEqual, Expression.GreaterThanOrEqual)],
|
|
[(Symbol.Dleft, Expression.ShiftLeft), (Symbol.Dright, Expression.ShiftRight)],
|
|
[(Symbol.Plus, Expression.Addition), (Symbol.Dash, Expression.Subtract)],
|
|
[(Symbol.Asterisk, Expression.Multiplication), (Symbol.Slash, Expression.Division), (Symbol.Percent, Expression.Modulo)]
|
|
];
|
|
|
|
func parse_expression_at_level(lexer: Lexer, level: int) -> Expression {
|
|
if level >= len(precedences) return parse_unary(lexer);
|
|
left: Expression = parse_expression_at_level(lexer, level+1);
|
|
tokens: TokenContents[] = [TokenContents.Symbol(symbol_expressor[0]) for symbol_expressor in precedences[level]];
|
|
expressor: (Expression, Expression) -> Expression;
|
|
while lexer_check_tokens(lexer, tokens) do {
|
|
match lexer_next_token(lexer).contents in {
|
|
case Symbol(symbol) do {
|
|
for symbol_expressor in precedences[level] if symbol_expressor[0] == symbol expressor = symbol_expressor[1];
|
|
left = expressor(left, parse_expression_at_level(lexer, level+1));
|
|
}
|
|
case _ assert false, "Unreachable";
|
|
}
|
|
}
|
|
return left;
|
|
}
|
|
|
|
func parse_ternary(lexer: Lexer) -> Expression {
|
|
expression: Expression = parse_expression_at_level(lexer, 0);
|
|
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression;
|
|
if_true: Expression = parse_expression_at_level(lexer, 0);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
|
|
if_false: Expression = parse_ternary(lexer);
|
|
return Expression.Ternary(expression, if_true, if_false);
|
|
}
|
|
|
|
func parse_expression(lexer: Lexer) -> Expression {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do {
|
|
parameters: TypeDeclaration[];
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = [];
|
|
else do {
|
|
parameters = [parse_type_declaration(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow));
|
|
}
|
|
return Expression.Lambda(parameters, parse_expression(lexer));
|
|
}
|
|
return parse_ternary(lexer);
|
|
}
|
|
|
|
enum Statement {
|
|
Statements(Statement[]),
|
|
EnumDefinition(str, EnumEntry[]),
|
|
StructDefinition(str, TypeDeclaration[]),
|
|
FunctionDefinition(str, TypeDeclaration[], OptionalTypeExpression, Statement),
|
|
Expression(Expression),
|
|
Assignment(Expression, Expression, OptionalTypeExpression),
|
|
TypeDeclaration(TypeDeclaration),
|
|
If(Expression, Statement, OptionalStatement),
|
|
While(Expression, Statement),
|
|
DoWhile(Statement, OptionalExpression),
|
|
Break,
|
|
Continue,
|
|
Match(Expression, (Expression, Statement)[]),
|
|
Assert(Expression, OptionalExpression),
|
|
ForLoop(str, Expression, Statement)
|
|
}
|
|
|
|
func statement_to_str(statement: Statement) -> str {
|
|
match statement in {
|
|
case EnumDefinition(name, entries) return "Enum %s" % name;
|
|
}
|
|
assert false, "Unimplemented statement_to_str";
|
|
}
|
|
|
|
enum OptionalStatement {
|
|
Some(Statement),
|
|
None
|
|
}
|
|
|
|
func parse_identifier(lexer: Lexer) -> str {
|
|
identifier_token: Token = lexer_next_token(lexer);
|
|
match identifier_token.contents in {
|
|
case Identifier(identifier) return identifier;
|
|
case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token);
|
|
}
|
|
}
|
|
|
|
func parse_number(lexer: Lexer) -> int {
|
|
number_token: Token = lexer_next_token(lexer);
|
|
match number_token.contents in {
|
|
case Number(number) return number;
|
|
case _ assert false, "Expected number!";
|
|
}
|
|
}
|
|
|
|
func parse_string(lexer: Lexer) -> str {
|
|
string_token: Token = lexer_next_token(lexer);
|
|
match string_token.contents in {
|
|
case String(string) return string;
|
|
case _ assert false, "Expected string!";
|
|
}
|
|
}
|
|
|
|
func is_valid_target(expression: Expression) -> bool {
|
|
match expression in {
|
|
case FieldAccess(subexpression, _) return is_valid_target(subexpression);
|
|
case Variable(_) return true;
|
|
case _ assert false, "Unimplemented is_valid_target %s" % expression;
|
|
}
|
|
}
|
|
|
|
func parse_statement(lexer: Lexer) -> Statement {
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do {
|
|
enum_name: str = parse_identifier(lexer);
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []);
|
|
|
|
enum_entries: EnumEntry[] = [parse_enum_entry(lexer)];
|
|
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
|
|
return Statement.EnumDefinition(enum_name, enum_entries);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do {
|
|
struct_name: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []);
|
|
|
|
struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)];
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
|
|
return Statement.StructDefinition(struct_name, struct_entries);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do {
|
|
function_name: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open));
|
|
function_arguments: TypeDeclaration[] = [];
|
|
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do {
|
|
function_arguments = function_arguments + [parse_type_declaration(lexer)];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)];
|
|
}
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
|
|
function_return_type: OptionalTypeExpression = OptionalTypeExpression.None;
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer));
|
|
|
|
function_body: Statement = parse_statement(lexer);
|
|
return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do {
|
|
return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do {
|
|
value: Expression = parse_expression(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
|
|
|
|
cases: (Expression, Statement)[] = [];
|
|
while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))];
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
|
|
return Statement.Match(value, cases);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do {
|
|
condition: Expression = parse_expression(lexer);
|
|
message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None;
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
return Statement.Assert(condition, message);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do {
|
|
body: Statement = parse_statement(lexer);
|
|
condition: OptionalExpression = OptionalExpression.None;
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
|
|
condition = parse_expression(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
}
|
|
return Statement.DoWhile(body, condition);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
|
|
return Statement.While(parse_expression(lexer), parse_statement(lexer));
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
|
|
variable: str = parse_identifier(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
|
|
expression: Expression = parse_expression(lexer);
|
|
body: Statement = parse_statement(lexer);
|
|
return Statement.ForLoop(variable, expression, body);
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do {
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
return Statement.Continue;
|
|
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do {
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
return Statement.Break;
|
|
}
|
|
|
|
else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
|
|
statements = [];
|
|
while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)];
|
|
return Statement.Statements(statements);
|
|
} else {
|
|
expression: Expression = parse_expression(lexer);
|
|
type_: OptionalTypeExpression = OptionalTypeExpression.None;
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do {
|
|
match expression in {
|
|
case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer));
|
|
case _ assert false, "Invalid target";
|
|
}
|
|
}
|
|
|
|
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do {
|
|
assert is_valid_target(expression), "Invalid target!";
|
|
right_expression: Expression = parse_expression(lexer);
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
return Statement.Assignment(expression, right_expression, type_);
|
|
}
|
|
|
|
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
|
|
match expression in {
|
|
case Variable(name) match type_ in {
|
|
case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression});
|
|
}
|
|
}
|
|
return Statement.Expression(expression);
|
|
}
|
|
}
|
|
|
|
print("Parsing...\n");
|
|
lexer: Lexer = lexer_from_file("test.pyc");
|
|
statements: Statement[] = [];
|
|
while !is_some_token(lexer_take_token(lexer, TokenContents.Eof)) statements = statements + [parse_statement(lexer)]; |