import "ppp_tokens.ppp"; import "ppp_lexer.ppp"; import "ppp_ast.ppp"; func parse_type_union(lexer: Lexer) -> TypeExpression { union_types: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Pipe))) union_types = union_types + [parse_type(lexer)]; if len(union_types) == 1 return union_types[0]; return TypeExpression.Union(union_types); } func parse_type_primary(lexer: Lexer) -> TypeExpression { base_type: TypeExpression; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]); types: TypeExpression[] = [parse_type_union(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type_union(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); if len(types) == 1 do { match types[0] in { case Union(_) base_type = types[0]; case _ base_type = TypeExpression.Tuple(types); } } else base_type = TypeExpression.Tuple(types); } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do { assert false, "Unimplemented parse_type_primary array"; } else { base_type = TypeExpression.Name(parse_identifier(lexer)); } while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) do { match symbol in { case OpenSquare match lexer_peek_token(lexer).contents in { case Number(number) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_type = TypeExpression.Array(base_type, number); continue; } } } closing: Symbol; match symbol in { case OpenSquare closing = Symbol.CloseSquare; case Left closing = Symbol.Right; case _ assert false, "Unreachable"; } match symbol in { case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do { base_type = TypeExpression.List(base_type); continue; } } generics: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(closing)); match base_type in { case Specification(_, _) assert false, "Cannot specify an already specified type"; } base_type = TypeExpression.Specification(base_type, generics); } case _ assert false, "Unreachable"; } } return base_type; } func parse_type(lexer: Lexer) -> TypeExpression { base_type: TypeExpression = parse_type_primary(lexer); if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type; return_type: TypeExpression = parse_type(lexer); match base_type in { case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type); } return TypeExpression.Function([base_type], return_type); } func parse_type_declaration(lexer: Lexer) -> TypeDeclaration { entry_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon)); entry_type: TypeExpression = parse_type(lexer); return TypeDeclaration{name=entry_name, type_=entry_type}; } func parse_enum_entry(lexer: Lexer) -> EnumEntry { entry_name: str = parse_identifier(lexer); if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) return EnumEntry{name=entry_name, types=[]}; entry_types: TypeExpression[] = [parse_type(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); return EnumEntry{name=entry_name, types=entry_types}; } func parse_struct_argument(lexer: Lexer) -> (str, Expression) { parameter: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal)); return (parameter, parse_expression(lexer)); } func parse_dict_entry(lexer: Lexer) -> (Expression, Expression) { key: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon)); return (key, parse_expression(lexer)); } func parse_primary(lexer: Lexer) -> Expression { base_expression: Expression; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]); else { elements: Expression[] = [parse_expression(lexer)]; singleton: bool = false; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do { if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do { singleton = true; break; } elements = elements + [parse_expression(lexer)]; } lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0]; } } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]); else { expressions: Expression[] = [parse_expression(lexer)]; if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do { variable: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); expression: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.LoopComrehension(expressions[0], variable, expression); } else { while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.Array(expressions); } } } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.Dictionary([]); else { expressions: (Expression, Expression)[] = [parse_dict_entry(lexer)]; if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do { variable: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); expression: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); base_expression = Expression.DictComprehension(expressions[0], variable, expression); } else { while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_dict_entry(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); base_expression = Expression.Dictionary(expressions); } } } else { token: Token = lexer_next_token(lexer); match token.contents in { case String(string) base_expression = Expression.String(string); case Number(number) base_expression = Expression.Number(number); case Identifier(string) base_expression = Expression.Variable(string); case _ assert false, "Expected identifier, but got %s!" % token_to_str(token); } } while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) match symbol in { case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer)); case Open do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []); else { arguments: Expression[] = [parse_expression(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); base_expression = Expression.FunctionCall(base_expression, arguments); } } case OpenSquare do { index: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare)); base_expression = Expression.ArrayAccess(base_expression, index); } case OpenCurly do { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []); else { struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); base_expression = Expression.StructInstantiation(base_expression, struct_arguments); } } case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol); } case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer)); } } return base_expression; } func parse_unary(lexer: Lexer) -> Expression { if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer)); return parse_primary(lexer); } precedences: dict[Symbol, (Expression, Expression) -> Expression][] = [ {Symbol.Dpipe: Expression.Or}, {Symbol.Dampersand: Expression.And}, {Symbol.Pipe: Expression.Bor}, {Symbol.Carot: Expression.Bxor}, {Symbol.Ampersand: Expression.Band}, {Symbol.Dequal: Expression.Equal, Symbol.NotEqual: Expression.NotEqual}, {Symbol.Left: Expression.LessThan, Symbol.Right: Expression.GreaterThan, Symbol.LesserEqual: Expression.LessThanOrEqual, Symbol.GreaterEqual: Expression.GreaterThanOrEqual}, {Symbol.Dleft: Expression.ShiftLeft, Symbol.Dright: Expression.ShiftRight}, {Symbol.Plus: Expression.Addition, Symbol.Dash: Expression.Subtract}, {Symbol.Asterisk: Expression.Multiplication, Symbol.Slash: Expression.Division, Symbol.Percent: Expression.Modulo} ]; func parse_expression_at_level(lexer: Lexer, level: int) -> Expression { if level >= len(precedences) return parse_unary(lexer); left: Expression = parse_expression_at_level(lexer, level+1); tokens: TokenContents[] = [TokenContents.Symbol(symbol) for symbol in precedences[level]]; while lexer_check_tokens(lexer, tokens) do { match lexer_next_token(lexer).contents in { case Symbol(symbol) do { expressor: (Expression, Expression) -> Expression = precedences[level][symbol]; left = expressor(left, parse_expression_at_level(lexer, level+1)); } case _ assert false, "Unreachable"; } } return left; } func parse_ternary(lexer: Lexer) -> Expression { expression: Expression = parse_expression_at_level(lexer, 0); if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression; if_true: Expression = parse_expression_at_level(lexer, 0); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon)); if_false: Expression = parse_ternary(lexer); return Expression.Ternary(expression, if_true, if_false); } func parse_expression(lexer: Lexer) -> Expression { if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer)); if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do { parameters: TypeDeclaration[]; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = []; else do { parameters = [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow)); } return Expression.Lambda(parameters, parse_expression(lexer)); } return parse_ternary(lexer); } func parse_identifier(lexer: Lexer) -> str { identifier_token: Token = lexer_next_token(lexer); match identifier_token.contents in { case Identifier(identifier) return identifier; case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token); } } func parse_number(lexer: Lexer) -> int { number_token: Token = lexer_next_token(lexer); match number_token.contents in { case Number(number) return number; case _ assert false, "Expected number!"; } } func parse_string(lexer: Lexer) -> str { string_token: Token = lexer_next_token(lexer); match string_token.contents in { case String(string) return string; case _ assert false, "Expected string!"; } } func is_valid_target(expression: Expression) -> bool { match expression in { case FieldAccess(subexpression, _) return is_valid_target(subexpression); case Variable(_) return true; case ArrayAccess(array, _) return is_valid_target(array); case _ assert false, "Unimplemented is_valid_target %s" % expression; } } func parse_statement(lexer: Lexer) -> Statement { if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do { enum_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []); enum_entries: EnumEntry[] = [parse_enum_entry(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.EnumDefinition(enum_name, enum_entries); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do { struct_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []); struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.StructDefinition(struct_name, struct_entries); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do { function_name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open)); function_arguments: TypeDeclaration[] = []; if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do { function_arguments = function_arguments + [parse_type_declaration(lexer)]; while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close)); } function_return_type: OptionalTypeExpression = OptionalTypeExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer)); function_body: Statement = parse_statement(lexer); return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do { return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do { value: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly)); cases: (Expression, Statement)[] = []; while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))]; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly)); return Statement.Match(value, cases); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do { condition: Expression = parse_expression(lexer); message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None; lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Assert(condition, message); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do { body: Statement = parse_statement(lexer); condition: OptionalExpression = OptionalExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do { condition = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); } return Statement.DoWhile(body, condition); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do { return Statement.While(parse_expression(lexer), parse_statement(lexer)); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do { variable: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In)); expression: Expression = parse_expression(lexer); body: Statement = parse_statement(lexer); return Statement.ForLoop(variable, expression, body); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Continue; } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do { lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Break; } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Import))) do { file: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Import(file); } else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Type))) do { name: str = parse_identifier(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal)); type_expression: TypeExpression = parse_type(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.TypeDefinition(name, type_expression); } else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do { statements: Statement[] = []; while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)]; return Statement.Statements(statements); } else { expression: Expression = parse_expression(lexer); type_: OptionalTypeExpression = OptionalTypeExpression.None; if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do { match expression in { case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer)); case _ assert false, "Invalid target"; } } if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do { assert is_valid_target(expression), "Invalid target!"; right_expression: Expression = parse_expression(lexer); lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); return Statement.Assignment(expression, right_expression, type_); } lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon)); match expression in { case Variable(name) match type_ in { case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression}); } } return Statement.Expression(expression); } }