enum Keyword { Enum, Struct, Func, If, Else, While, Break, Continue, Do, For, To, In, Match, Case, Assert, Return, Lambda } enum OptionalKeyword { Some(Keyword), None } func keyword_from_str(keyword: str) -> OptionalKeyword { if keyword == "enum" return OptionalKeyword.Some(Keyword.Enum); if keyword == "struct" return OptionalKeyword.Some(Keyword.Struct); if keyword == "func" return OptionalKeyword.Some(Keyword.Func); if keyword == "if" return OptionalKeyword.Some(Keyword.If); if keyword == "else" return OptionalKeyword.Some(Keyword.Else); if keyword == "while" return OptionalKeyword.Some(Keyword.While); if keyword == "break" return OptionalKeyword.Some(Keyword.Break); if keyword == "continue" return OptionalKeyword.Some(Keyword.Continue); if keyword == "do" return OptionalKeyword.Some(Keyword.Do); if keyword == "for" return OptionalKeyword.Some(Keyword.For); if keyword == "to" return OptionalKeyword.Some(Keyword.To); if keyword == "in" return OptionalKeyword.Some(Keyword.In); if keyword == "match" return OptionalKeyword.Some(Keyword.Match); if keyword == "case" return OptionalKeyword.Some(Keyword.Case); if keyword == "assert" return OptionalKeyword.Some(Keyword.Assert); if keyword == "return" return OptionalKeyword.Some(Keyword.Return); if keyword == "lambda" return OptionalKeyword.Some(Keyword.Lambda); return OptionalKeyword.None; } func keyword_to_str(keyword: Keyword) -> str { match keyword in { case Enum return "enum"; case Struct return "struct"; case Func return "func"; case If return "if"; case Else return "else"; case While return "while"; case Break return "break"; case Continue return "continue"; case Do return "do"; case For return "for"; case To return "to"; case In return "in"; case Match return "match"; case Case return "case"; case Assert return "assert"; case Return return "return"; case Lambda return "lambda"; } assert false, "Invalid keyword"; } enum Symbol { Open, Close, OpenCurly, CloseCurly, Comma, OpenSquare, CloseSquare, Colon, Left, Right, Arrow, Semicolon, Equal, Dequal, Exclamation, NotEqual, Dot, Plus, Dash, Asterisk, Dasterisk, Slash, QuestionMark, Ampersand, Dampersand, Pipe, Dpipe, Dleft, Dright, GreaterEqual, LesserEqual, Percent, Tilde, Carot } enum OptionalSymbol { Some(Symbol), None } func symbol_from_str(symbol: str) -> OptionalSymbol { if symbol == "(" return OptionalSymbol.Some(Symbol.Open); if symbol == ")" return OptionalSymbol.Some(Symbol.Close); if symbol == "{" return OptionalSymbol.Some(Symbol.OpenCurly); if symbol == "}" return OptionalSymbol.Some(Symbol.CloseCurly); if symbol == "," return OptionalSymbol.Some(Symbol.Comma); if symbol == "[" return OptionalSymbol.Some(Symbol.OpenSquare); if symbol == "]" return OptionalSymbol.Some(Symbol.CloseSquare); if symbol == ":" return OptionalSymbol.Some(Symbol.Colon); if symbol == "<" return OptionalSymbol.Some(Symbol.Left); if symbol == ">" return OptionalSymbol.Some(Symbol.Right); if symbol == "->" return OptionalSymbol.Some(Symbol.Arrow); if symbol == ";" return OptionalSymbol.Some(Symbol.Semicolon); if symbol == "=" return OptionalSymbol.Some(Symbol.Equal); if symbol == "==" return OptionalSymbol.Some(Symbol.Dequal); if symbol == "!" return OptionalSymbol.Some(Symbol.Exclamation); if symbol == "!=" return OptionalSymbol.Some(Symbol.NotEqual); if symbol == "." return OptionalSymbol.Some(Symbol.Dot); if symbol == "+" return OptionalSymbol.Some(Symbol.Plus); if symbol == "-" return OptionalSymbol.Some(Symbol.Dash); if symbol == "*" return OptionalSymbol.Some(Symbol.Asterisk); if symbol == "**" return OptionalSymbol.Some(Symbol.Dasterisk); if symbol == "/" return OptionalSymbol.Some(Symbol.Slash); if symbol == "?" return OptionalSymbol.Some(Symbol.QuestionMark); if symbol == "&" return OptionalSymbol.Some(Symbol.Ampersand); if symbol == "&&" return OptionalSymbol.Some(Symbol.Dampersand); if symbol == "|" return OptionalSymbol.Some(Symbol.Pipe); if symbol == "||" return OptionalSymbol.Some(Symbol.Dpipe); if symbol == "<<" return OptionalSymbol.Some(Symbol.Dleft); if symbol == ">>" return OptionalSymbol.Some(Symbol.Dright); if symbol == ">=" return OptionalSymbol.Some(Symbol.GreaterEqual); if symbol == "<=" return OptionalSymbol.Some(Symbol.LesserEqual); if symbol == "%" return OptionalSymbol.Some(Symbol.Percent); if symbol == "~" return OptionalSymbol.Some(Symbol.Tilde); if symbol == "^" return OptionalSymbol.Some(Symbol.Carot); assert false, "Unimplemented symbol '%s'" % symbol; } func symbol_to_str(symbol: Symbol) -> str { match symbol in { case Open return "("; case Close return ")"; case OpenCurly return "{"; case CloseCurly return "}"; case Comma return ","; case OpenSquare return "["; case CloseSquare return "]"; case Colon return ":"; case Left return "<"; case Right return ">"; case Arrow return "->"; case Semicolon return ";"; case Equal return "="; case Dequal return "=="; case Exclamation return "!"; case NotEqual return "!="; case Dot return "."; case Plus return "+"; case Dash return "-"; case Asterisk return "*"; case Dasterisk return "**"; case Slash return "/"; case QuestionMark return "?"; case Ampersand return "&"; case Dampersand return "&&"; case Pipe return "|"; case Dpipe return "||"; case Dleft return "<<"; case Dright return ">>"; case GreaterEqual return ">="; case LesserEqual return "<="; case Percent return "%"; case Tilde return "~"; case Carot return "^"; } assert false, "Invalid symbol"; } enum TokenContents { Keyword(Keyword), Identifier(str), Number(int), String(str), Symbol(Symbol), Eof } func token_contents_to_str(token: TokenContents) -> str { match token in { case Keyword(keyword) return "Keyword(%s)" % keyword_to_str(keyword); case Identifier(string) return "Identifier(%s)" % string; case Number(number) return "Number(%d)" % number; case String(string) return "String(\"%s\")" % string; case Symbol(symbol) return "Symbol('%s')" % symbol_to_str(symbol); case Eof return "Eof"; } } struct Token { line: int, col: int, value: str, contents: TokenContents } func token_to_str(token: Token) -> str { return token_contents_to_str(token.contents)+"{:"+int_to_str(token.line)+":"+int_to_str(token.col)+"}"; } enum OptionalToken { Some(Token), None } func is_some_token(maybe_token: OptionalToken) -> bool { match maybe_token in { case Some(_) return true; case None return false; } assert false, "Unreachable"; } struct Lexer { source: str, location: int, line: int, col: int, peeked_token: OptionalToken } func new_lexer(source: str) -> Lexer { return Lexer{ source = source, location = 0, line = 1, col = 0, peeked_token = OptionalToken.None }; } func lexer_from_file(path: str) -> Lexer return new_lexer(read(path)); func is_space(char: str) -> bool { return char == " " || char == "\t" || char == "\n"; } func is_digit(char: str) -> bool { return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9"; } func is_alpha(char: str) -> bool { return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_"; } func lexer_next_token(lexer: Lexer) -> Token { match lexer.peeked_token in { case Some(token) do { lexer.peeked_token = OptionalToken.None; return token; } } while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do { if lexer.source[lexer.location] == "\n" do { lexer.line = lexer.line + 1; lexer.col = 0; } lexer.location = lexer.location + 1; } if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof}; if is_digit(lexer.source[lexer.location]) do { number_str: str = ""; while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do { number_str = number_str + lexer.source[lexer.location]; lexer.location = lexer.location + 1; } number: int = str_to_int(number_str); return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)}; } else if is_alpha(lexer.source[lexer.location]) do { word_str: str = ""; while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do { word_str = word_str + lexer.source[lexer.location]; lexer.location = lexer.location + 1; } match keyword_from_str(word_str) in { case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)}; case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)}; } assert false, "Identifier"; } else if lexer.source[lexer.location] == "\"" do { lexer.location = lexer.location + 1; string_str: str = ""; escaping: bool = false; while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do { escaping = escaping? false: lexer.source[lexer.location] == "\\"; string_str = string_str + lexer.source[lexer.location]; lexer.location = lexer.location + 1; } lexer.location = lexer.location + 1; return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)}; } else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)}; } else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)}; } else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)}; } else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)}; } else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)}; } else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)}; } else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)}; } else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do { lexer.location = lexer.location + 2; return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)}; } else { match symbol_from_str(lexer.source[lexer.location]) in { case Some(symbol) do { lexer.location = lexer.location + 1; return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)}; } case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location]; } assert false, "Unreachable Symbol"; } assert false, "Unreachable, next_token"; } func lexer_peek_token(lexer: Lexer) -> Token { match lexer.peeked_token in { case Some(token) return token; case None do { token: Token = lexer_next_token(lexer); lexer.peeked_token = OptionalToken.Some(token); return token; } } assert false, "Unreachable"; } func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool { token: Token = lexer_peek_token(lexer); return token.contents == expected; } func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken { if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer)); return OptionalToken.None; } func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken { for token in tokens do { if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer)); } return OptionalToken.None; } func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token { token: Token = lexer_next_token(lexer); assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token)); return token; } func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool { for token in tokens if lexer_check_token(lexer, token) return true; return false; } enum TypeExpression { Tuple(TypeExpression[]), Union(TypeExpression[]), List(TypeExpression), Array(TypeExpression, int), Name(str), Specification(TypeExpression, TypeExpression[]), Function(TypeExpression[], TypeExpression) } enum OptionalTypeExpression { Some(TypeExpression), None } func parse_type_primary(lexer: Lexer) -> TypeExpression { base_type: TypeExpression; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]); types: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); base_type = TypeExpression.Tuple(types); } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do { assert false, "Unimplemented parse_type_primary array"; } else { base_type = TypeExpression.Name(parse_identifier(lexer)); } closing: Symbol; while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) do { match symbol in { case OpenSquare match lexer_peek_token(lexer).contents in { case Number(number) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_type = TypeExpression.Array(base_type, number); continue; } } } match symbol in { case OpenSquare closing = Symbol.CloseSquare; case Left closing = Symbol.Right; case _ assert false, "Unreachable"; } match symbol in { case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do { base_type = TypeExpression.List(base_type); continue; } } generics: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(closing)); match base_type in { case Specification assert false, "Cannot specify an already specified type"; } base_type = TypeExpression.Specification(base_type, generics); } case _ assert false, "Unreachable"; } } return base_type; } func parse_type(lexer: Lexer) -> TypeExpression { base_type: TypeExpression = parse_type_primary(lexer); if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type; return_type: TypeExpression = parse_type(lexer); match base_type in { case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type); } return TypeExpression.Function([base_type], return_type); } struct TypeDeclaration { name: str, type_: TypeExpression } func parse_type_declaration(lexer: Lexer) -> TypeDeclaration { entry_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon)); entry_type: TypeExpression = parse_type(lexer); return TypeDeclaration{name=entry_name, type_=entry_type}; } enum EnumEntry { Const(str), Tuple(str, TypeExpression[]), Struct(str, TypeDeclaration[]) } func parse_enum_entry(lexer: Lexer) -> EnumEntry { entry_name: str = parse_identifier(lexer); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do { entry_types: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); return EnumEntry.Tuple(entry_name, entry_types); } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return EnumEntry.Struct(entry_name, []); assert false, "Unimplemented parse_enum_entry"; } return EnumEntry.Const(entry_name); } enum Expression { FunctionCall(Expression, Expression[]), Variable(str), ArrayAccess(Expression, Expression), Array(Expression[]), FieldAccess(Expression, str), Number(int), String(str), Tuple(Expression[]), StructInstantiation(Expression, (str, Expression)[]), LoopComrehension(Expression, str, Expression), Return(Expression), Ternary(Expression, Expression, Expression), Or(Expression, Expression), And(Expression, Expression), Bor(Expression, Expression), Bxor(Expression, Expression), Band(Expression, Expression), Equal(Expression, Expression), NotEqual(Expression, Expression), LessThan(Expression, Expression), GreaterThan(Expression, Expression), LessThanOrEqual(Expression, Expression), GreaterThanOrEqual(Expression, Expression), ShiftLeft(Expression, Expression), ShiftRight(Expression, Expression), Addition(Expression, Expression), Subtract(Expression, Expression), Multiplication(Expression, Expression), Division(Expression, Expression), Modulo(Expression, Expression), Bnot(Expression), Not(Expression), UnaryPlus(Expression), UnaryMinus(Expression) } enum OptionalExpression { Some(Expression), None } func parse_struct_argument(lexer: Lexer) -> (str, Expression) { parameter: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal)); return (parameter, parse_expression(lexer)); } func parse_primary(lexer: Lexer) -> Expression { base_expression: Expression; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]); else { elements: Expression[] = [parse_expression(lexer)]; singleton: bool = false; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do { if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do { singleton = true; break; } elements = elements + [parse_expression(lexer)]; } lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0]; } } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]); else { expressions: Expression[] = [parse_expression(lexer)]; if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do { variable: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); expression: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.LoopComrehension(expressions[0], variable, expression); } else { while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.Array(expressions); } } } else { match lexer_next_token(lexer).contents in { case String(string) base_expression = Expression.String(string); case Number(number) base_expression = Expression.Number(number); case Identifier(string) base_expression = Expression.Variable(string); case _token assert false, "Expected identifier, but got %s!" % token_to_str(_token); } } while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) match symbol in { case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer)); case Open do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []); else { arguments: Expression[] = [parse_expression(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); base_expression = Expression.FunctionCall(base_expression, arguments); } } case OpenSquare do { index: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.ArrayAccess(base_expression, index); } case OpenCurly do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []); else { struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); base_expression = Expression.StructInstantiation(base_expression, struct_arguments); } } case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol); } case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer)); } } return base_expression; } func parse_unary(lexer: Lexer) -> Expression { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer)); return parse_primary(lexer); } precedences: (Symbol, (Expression, Expression) -> Expression)[][] = [ [(Symbol.Dpipe, Expression.Or)], [(Symbol.Dampersand, Expression.And)], [(Symbol.Pipe, Expression.Bor)], [(Symbol.Carot, Expression.Bxor)], [(Symbol.Ampersand, Expression.Band)], [(Symbol.Dequal, Expression.Equal), (Symbol.NotEqual, Expression.NotEqual)], [(Symbol.Left, Expression.LessThan), (Symbol.Right, Expression.GreaterThan), (Symbol.LesserEqual, Expression.LessThanOrEqual), (Symbol.GreaterEqual, Expression.GreaterThanOrEqual)], [(Symbol.Dleft, Expression.ShiftLeft), (Symbol.Dright, Expression.ShiftRight)], [(Symbol.Plus, Expression.Addition), (Symbol.Dash, Expression.Subtract)], [(Symbol.Asterisk, Expression.Multiplication), (Symbol.Slash, Expression.Division), (Symbol.Percent, Expression.Modulo)] ]; func parse_expression_at_level(lexer: Lexer, level: int) -> Expression { if level >= len(precedences) return parse_unary(lexer); left: Expression = parse_expression_at_level(lexer, level+1); tokens: TokenContents[] = [TokenContents.Symbol(symbol_expressor[0]) for symbol_expressor in precedences[level]]; expressor: (Expression, Expression) -> Expression; while lexer_check_tokens(lexer, tokens) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) do { for symbol_expressor in precedences[level] if symbol_expressor[0] == symbol expressor = symbol_expressor[1]; left = expressor(left, parse_expression_at_level(lexer, level+1)); } case _ assert false, "Unreachable"; } } return left; } func parse_ternary(lexer: Lexer) -> Expression { expression: Expression = parse_expression_at_level(lexer, 0); if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression; if_true: Expression = parse_expression_at_level(lexer, 0); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon)); if_false: Expression = parse_ternary(lexer); return Expression.Ternary(expression, if_true, if_false); } func parse_expression(lexer: Lexer) -> Expression { if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do { parameters: TypeDeclaration[]; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = []; else do { parameters = [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow)); } return Expression.Lambda(parameters, parse_expression(lexer)); } return parse_ternary(lexer); } enum Statement { Statements(Statement[]), EnumDefinition(str, EnumEntry[]), StructDefinition(str, TypeDeclaration[]), FunctionDefinition(str, TypeDeclaration[], OptionalTypeExpression, Statement), Expression(Expression), Assignment(Expression, Expression, OptionalTypeExpression), TypeDeclaration(TypeDeclaration), If(Expression, Statement, OptionalStatement), While(Expression, Statement), DoWhile(Statement, OptionalExpression), Break, Continue, Match(Expression, (Expression, Statement)[]), Assert(Expression, OptionalExpression), ForLoop(str, Expression, Statement) } func statement_to_str(statement: Statement) -> str { match statement in { case EnumDefinition(name, entries) return "Enum %s" % name; } assert false, "Unimplemented statement_to_str"; } enum OptionalStatement { Some(Statement), None } func parse_identifier(lexer: Lexer) -> str { identifier_token: Token = lexer_next_token(lexer); match identifier_token.contents in { case Identifier(identifier) return identifier; case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token); } } func parse_number(lexer: Lexer) -> int { number_token: Token = lexer_next_token(lexer); match number_token.contents in { case Number(number) return number; case _ assert false, "Expected number!"; } } func parse_string(lexer: Lexer) -> str { string_token: Token = lexer_next_token(lexer); match string_token.contents in { case String(string) return string; case _ assert false, "Expected string!"; } } func is_valid_target(expression: Expression) -> bool { match expression in { case FieldAccess(subexpression, _) return is_valid_target(subexpression); case Variable(_) return true; case _ assert false, "Unimplemented is_valid_target %s" % expression; } } func parse_statement(lexer: Lexer) -> Statement { if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do { enum_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []); enum_entries: EnumEntry[] = [parse_enum_entry(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.EnumDefinition(enum_name, enum_entries); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do { struct_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []); struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.StructDefinition(struct_name, struct_entries); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do { function_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open)); function_arguments: TypeDeclaration[] = []; if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do { function_arguments = function_arguments + [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)]; } lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); function_return_type: OptionalTypeExpression = OptionalTypeExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer)); function_body: Statement = parse_statement(lexer); return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do { return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do { value: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); cases: (Expression, Statement)[] = []; while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.Match(value, cases); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do { condition: Expression = parse_expression(lexer); message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Assert(condition, message); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do { body: Statement = parse_statement(lexer); condition: OptionalExpression = OptionalExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do { condition = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); } return Statement.DoWhile(body, condition); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do { return Statement.While(parse_expression(lexer), parse_statement(lexer)); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do { variable: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); expression: Expression = parse_expression(lexer); body: Statement = parse_statement(lexer); return Statement.ForLoop(variable, expression, body); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Continue; } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Break; } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do { statements = []; while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)]; return Statement.Statements(statements); } else { expression: Expression = parse_expression(lexer); type_: OptionalTypeExpression = OptionalTypeExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do { match expression in { case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer)); case _ assert false, "Invalid target"; } } if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do { assert is_valid_target(expression), "Invalid target!"; right_expression: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Assignment(expression, right_expression, type_); } lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); match expression in { case Variable(name) match type_ in { case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression}); } } return Statement.Expression(expression); } } print("Parsing...\n"); lexer: Lexer = lexer_from_file("test.pyc"); statements: Statement[] = []; while !is_some_token(lexer_take_token(lexer, TokenContents.Eof)) statements = statements + [parse_statement(lexer)];