python-plus-plus/ppp/ppp_parser.ppp

426 lines
20 KiB
Plaintext
Raw Normal View History

import "ppp_tokens.ppp";
import "ppp_lexer.ppp";
import "ppp_ast.ppp";
func parse_type_union(lexer: Lexer) -> TypeExpression {
union_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Pipe))) union_types = union_types + [parse_type(lexer)];
if len(union_types) == 1 return union_types[0];
return TypeExpression.Union(union_types);
}
func parse_type_primary(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]);
types: TypeExpression[] = [parse_type_union(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type_union(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
if len(types) == 1 do {
match types[0] in {
case Union(_) base_type = types[0];
case _ base_type = TypeExpression.Tuple(types);
}
} else base_type = TypeExpression.Tuple(types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
assert false, "Unimplemented parse_type_primary array";
} else {
base_type = TypeExpression.Name(parse_identifier(lexer));
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
match symbol in {
case OpenSquare match lexer_peek_token(lexer).contents in {
case Number(number) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_type = TypeExpression.Array(base_type, number);
continue;
}
}
}
closing: Symbol;
match symbol in {
case OpenSquare closing = Symbol.CloseSquare;
case Left closing = Symbol.Right;
case _ assert false, "Unreachable";
}
match symbol in {
case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do {
base_type = TypeExpression.List(base_type);
continue;
}
}
generics: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(closing));
match base_type in {
case Specification(_, _) assert false, "Cannot specify an already specified type";
}
base_type = TypeExpression.Specification(base_type, generics);
}
case _ assert false, "Unreachable";
}
}
return base_type;
}
func parse_type(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression = parse_type_primary(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type;
return_type: TypeExpression = parse_type(lexer);
match base_type in {
case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type);
}
return TypeExpression.Function([base_type], return_type);
}
func parse_type_declaration(lexer: Lexer) -> TypeDeclaration {
entry_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
entry_type: TypeExpression = parse_type(lexer);
return TypeDeclaration{name=entry_name, type_=entry_type};
}
func parse_enum_entry(lexer: Lexer) -> EnumEntry {
entry_name: str = parse_identifier(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) return EnumEntry{name=entry_name, types=[]};
entry_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
return EnumEntry{name=entry_name, types=entry_types};
}
func parse_struct_argument(lexer: Lexer) -> (str, Expression) {
parameter: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
return (parameter, parse_expression(lexer));
}
func parse_dict_entry(lexer: Lexer) -> (Expression, Expression) {
key: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
return (key, parse_expression(lexer));
}
func parse_primary(lexer: Lexer) -> Expression {
base_expression: Expression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]);
else {
elements: Expression[] = [parse_expression(lexer)];
singleton: bool = false;
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do {
if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do {
singleton = true;
break;
}
elements = elements + [parse_expression(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0];
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]);
else {
expressions: Expression[] = [parse_expression(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.LoopComrehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.Array(expressions);
}
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.Dictionary([]);
else {
expressions: (Expression, Expression)[] = [parse_dict_entry(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.DictComprehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_dict_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.Dictionary(expressions);
}
}
} else {
token: Token = lexer_next_token(lexer);
match token.contents in {
case String(string) base_expression = Expression.String(string);
case Number(number) base_expression = Expression.Number(number);
case Identifier(string) base_expression = Expression.Variable(string);
case _ assert false, "Expected identifier, but got %s!" % token_to_str(token);
}
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) match symbol in {
case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer));
case Open do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []);
else {
arguments: Expression[] = [parse_expression(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = Expression.FunctionCall(base_expression, arguments);
}
}
case OpenSquare do {
index: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.ArrayAccess(base_expression, index);
}
case OpenCurly do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []);
else {
struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.StructInstantiation(base_expression, struct_arguments);
}
}
case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol);
}
case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer));
}
}
return base_expression;
}
func parse_unary(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer));
return parse_primary(lexer);
}
precedences: dict[Symbol, (Expression, Expression) -> Expression][] = [
{Symbol.Dpipe: Expression.Or},
{Symbol.Dampersand: Expression.And},
{Symbol.Pipe: Expression.Bor},
{Symbol.Carot: Expression.Bxor},
{Symbol.Ampersand: Expression.Band},
{Symbol.Dequal: Expression.Equal, Symbol.NotEqual: Expression.NotEqual},
{Symbol.Left: Expression.LessThan, Symbol.Right: Expression.GreaterThan, Symbol.LesserEqual: Expression.LessThanOrEqual, Symbol.GreaterEqual: Expression.GreaterThanOrEqual},
{Symbol.Dleft: Expression.ShiftLeft, Symbol.Dright: Expression.ShiftRight},
{Symbol.Plus: Expression.Addition, Symbol.Dash: Expression.Subtract},
{Symbol.Asterisk: Expression.Multiplication, Symbol.Slash: Expression.Division, Symbol.Percent: Expression.Modulo}
];
func parse_expression_at_level(lexer: Lexer, level: int) -> Expression {
if level >= len(precedences) return parse_unary(lexer);
left: Expression = parse_expression_at_level(lexer, level+1);
tokens: TokenContents[] = [TokenContents.Symbol(symbol) for symbol in precedences[level]];
while lexer_check_tokens(lexer, tokens) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
expressor: (Expression, Expression) -> Expression = precedences[level][symbol];
left = expressor(left, parse_expression_at_level(lexer, level+1));
}
case _ assert false, "Unreachable";
}
}
return left;
}
func parse_ternary(lexer: Lexer) -> Expression {
expression: Expression = parse_expression_at_level(lexer, 0);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression;
if_true: Expression = parse_expression_at_level(lexer, 0);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
if_false: Expression = parse_ternary(lexer);
return Expression.Ternary(expression, if_true, if_false);
}
func parse_expression(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do {
parameters: TypeDeclaration[];
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = [];
else do {
parameters = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow));
}
return Expression.Lambda(parameters, parse_expression(lexer));
}
return parse_ternary(lexer);
}
func parse_identifier(lexer: Lexer) -> str {
identifier_token: Token = lexer_next_token(lexer);
match identifier_token.contents in {
case Identifier(identifier) return identifier;
case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token);
}
}
func parse_number(lexer: Lexer) -> int {
number_token: Token = lexer_next_token(lexer);
match number_token.contents in {
case Number(number) return number;
case _ assert false, "Expected number!";
}
}
func parse_string(lexer: Lexer) -> str {
string_token: Token = lexer_next_token(lexer);
match string_token.contents in {
case String(string) return string;
case _ assert false, "Expected string!";
}
}
func is_valid_target(expression: Expression) -> bool {
match expression in {
case FieldAccess(subexpression, _) return is_valid_target(subexpression);
case Variable(_) return true;
case ArrayAccess(array, _) return is_valid_target(array);
case _ assert false, "Unimplemented is_valid_target %s" % expression;
}
}
func parse_statement(lexer: Lexer) -> Statement {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do {
enum_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []);
enum_entries: EnumEntry[] = [parse_enum_entry(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.EnumDefinition(enum_name, enum_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do {
struct_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []);
struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.StructDefinition(struct_name, struct_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do {
function_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open));
function_arguments: TypeDeclaration[] = [];
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do {
function_arguments = function_arguments + [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
}
function_return_type: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer));
function_body: Statement = parse_statement(lexer);
return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do {
return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do {
value: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
cases: (Expression, Statement)[] = [];
while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.Match(value, cases);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do {
condition: Expression = parse_expression(lexer);
message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None;
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assert(condition, message);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do {
body: Statement = parse_statement(lexer);
condition: OptionalExpression = OptionalExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
condition = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
}
return Statement.DoWhile(body, condition);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
return Statement.While(parse_expression(lexer), parse_statement(lexer));
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
body: Statement = parse_statement(lexer);
return Statement.ForLoop(variable, expression, body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Continue;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Break;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Import))) do {
file: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Import(file);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Type))) do {
name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
type_expression: TypeExpression = parse_type(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.TypeDefinition(name, type_expression);
}
else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
statements: Statement[] = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)];
return Statement.Statements(statements);
} else {
expression: Expression = parse_expression(lexer);
type_: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do {
match expression in {
case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer));
case _ assert false, "Invalid target";
}
}
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do {
assert is_valid_target(expression), "Invalid target!";
right_expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assignment(expression, right_expression, type_);
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
match expression in {
case Variable(name) match type_ in {
case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression});
}
}
return Statement.Expression(expression);
}
}