python-plus-plus/ppp/ppp-whole.ppp

874 lines
35 KiB
Plaintext
Raw Normal View History

enum Keyword {
Enum,
Struct,
Func,
If,
Else,
While,
Break,
Continue,
Do,
For,
To,
In,
Match,
Case,
Assert,
Return,
Lambda
}
enum OptionalKeyword {
Some(Keyword),
None
}
func keyword_from_str(keyword: str) -> OptionalKeyword {
if keyword == "enum" return OptionalKeyword.Some(Keyword.Enum);
if keyword == "struct" return OptionalKeyword.Some(Keyword.Struct);
if keyword == "func" return OptionalKeyword.Some(Keyword.Func);
if keyword == "if" return OptionalKeyword.Some(Keyword.If);
if keyword == "else" return OptionalKeyword.Some(Keyword.Else);
if keyword == "while" return OptionalKeyword.Some(Keyword.While);
if keyword == "break" return OptionalKeyword.Some(Keyword.Break);
if keyword == "continue" return OptionalKeyword.Some(Keyword.Continue);
if keyword == "do" return OptionalKeyword.Some(Keyword.Do);
if keyword == "for" return OptionalKeyword.Some(Keyword.For);
if keyword == "to" return OptionalKeyword.Some(Keyword.To);
if keyword == "in" return OptionalKeyword.Some(Keyword.In);
if keyword == "match" return OptionalKeyword.Some(Keyword.Match);
if keyword == "case" return OptionalKeyword.Some(Keyword.Case);
if keyword == "assert" return OptionalKeyword.Some(Keyword.Assert);
if keyword == "return" return OptionalKeyword.Some(Keyword.Return);
if keyword == "lambda" return OptionalKeyword.Some(Keyword.Lambda);
return OptionalKeyword.None;
}
func keyword_to_str(keyword: Keyword) -> str {
match keyword in {
case Enum return "enum";
case Struct return "struct";
case Func return "func";
case If return "if";
case Else return "else";
case While return "while";
case Break return "break";
case Continue return "continue";
case Do return "do";
case For return "for";
case To return "to";
case In return "in";
case Match return "match";
case Case return "case";
case Assert return "assert";
case Return return "return";
case Lambda return "lambda";
}
assert false, "Invalid keyword";
}
enum Symbol {
Open,
Close,
OpenCurly,
CloseCurly,
Comma,
OpenSquare,
CloseSquare,
Colon,
Left,
Right,
Arrow,
Semicolon,
Equal,
Dequal,
Exclamation,
NotEqual,
Dot,
Plus,
Dash,
Asterisk,
Dasterisk,
Slash,
QuestionMark,
Ampersand,
Dampersand,
Pipe,
Dpipe,
Dleft,
Dright,
GreaterEqual,
LesserEqual,
Percent,
Tilde,
Carot
}
enum OptionalSymbol {
Some(Symbol),
None
}
func symbol_from_str(symbol: str) -> OptionalSymbol {
if symbol == "(" return OptionalSymbol.Some(Symbol.Open);
if symbol == ")" return OptionalSymbol.Some(Symbol.Close);
if symbol == "{" return OptionalSymbol.Some(Symbol.OpenCurly);
if symbol == "}" return OptionalSymbol.Some(Symbol.CloseCurly);
if symbol == "," return OptionalSymbol.Some(Symbol.Comma);
if symbol == "[" return OptionalSymbol.Some(Symbol.OpenSquare);
if symbol == "]" return OptionalSymbol.Some(Symbol.CloseSquare);
if symbol == ":" return OptionalSymbol.Some(Symbol.Colon);
if symbol == "<" return OptionalSymbol.Some(Symbol.Left);
if symbol == ">" return OptionalSymbol.Some(Symbol.Right);
if symbol == "->" return OptionalSymbol.Some(Symbol.Arrow);
if symbol == ";" return OptionalSymbol.Some(Symbol.Semicolon);
if symbol == "=" return OptionalSymbol.Some(Symbol.Equal);
if symbol == "==" return OptionalSymbol.Some(Symbol.Dequal);
if symbol == "!" return OptionalSymbol.Some(Symbol.Exclamation);
if symbol == "!=" return OptionalSymbol.Some(Symbol.NotEqual);
if symbol == "." return OptionalSymbol.Some(Symbol.Dot);
if symbol == "+" return OptionalSymbol.Some(Symbol.Plus);
if symbol == "-" return OptionalSymbol.Some(Symbol.Dash);
if symbol == "*" return OptionalSymbol.Some(Symbol.Asterisk);
if symbol == "**" return OptionalSymbol.Some(Symbol.Dasterisk);
if symbol == "/" return OptionalSymbol.Some(Symbol.Slash);
if symbol == "?" return OptionalSymbol.Some(Symbol.QuestionMark);
if symbol == "&" return OptionalSymbol.Some(Symbol.Ampersand);
if symbol == "&&" return OptionalSymbol.Some(Symbol.Dampersand);
if symbol == "|" return OptionalSymbol.Some(Symbol.Pipe);
if symbol == "||" return OptionalSymbol.Some(Symbol.Dpipe);
if symbol == "<<" return OptionalSymbol.Some(Symbol.Dleft);
if symbol == ">>" return OptionalSymbol.Some(Symbol.Dright);
if symbol == ">=" return OptionalSymbol.Some(Symbol.GreaterEqual);
if symbol == "<=" return OptionalSymbol.Some(Symbol.LesserEqual);
if symbol == "%" return OptionalSymbol.Some(Symbol.Percent);
if symbol == "~" return OptionalSymbol.Some(Symbol.Tilde);
if symbol == "^" return OptionalSymbol.Some(Symbol.Carot);
assert false, "Unimplemented symbol '%s'" % symbol;
}
func symbol_to_str(symbol: Symbol) -> str {
match symbol in {
case Open return "(";
case Close return ")";
case OpenCurly return "{";
case CloseCurly return "}";
case Comma return ",";
case OpenSquare return "[";
case CloseSquare return "]";
case Colon return ":";
case Left return "<";
case Right return ">";
case Arrow return "->";
case Semicolon return ";";
case Equal return "=";
case Dequal return "==";
case Exclamation return "!";
case NotEqual return "!=";
case Dot return ".";
case Plus return "+";
case Dash return "-";
case Asterisk return "*";
case Dasterisk return "**";
case Slash return "/";
case QuestionMark return "?";
case Ampersand return "&";
case Dampersand return "&&";
case Pipe return "|";
case Dpipe return "||";
case Dleft return "<<";
case Dright return ">>";
case GreaterEqual return ">=";
case LesserEqual return "<=";
case Percent return "%";
case Tilde return "~";
case Carot return "^";
}
assert false, "Invalid symbol";
}
enum TokenContents {
Keyword(Keyword),
Identifier(str),
Number(int),
String(str),
Symbol(Symbol),
Eof
}
func token_contents_to_str(token: TokenContents) -> str {
match token in {
case Keyword(keyword) return "Keyword(%s)" % keyword_to_str(keyword);
case Identifier(string) return "Identifier(%s)" % string;
case Number(number) return "Number(%d)" % number;
case String(string) return "String(\"%s\")" % string;
case Symbol(symbol) return "Symbol('%s')" % symbol_to_str(symbol);
case Eof return "Eof";
}
}
struct Token {
line: int,
col: int,
value: str,
contents: TokenContents
}
func token_to_str(token: Token) -> str {
return token_contents_to_str(token.contents)+"{:"+int_to_str(token.line)+":"+int_to_str(token.col)+"}";
}
enum OptionalToken {
Some(Token),
None
}
func is_some_token(maybe_token: OptionalToken) -> bool {
match maybe_token in {
case Some(_) return true;
case None return false;
}
assert false, "Unreachable";
}
struct Lexer {
source: str,
location: int,
line: int,
col: int,
peeked_token: OptionalToken
}
func new_lexer(source: str) -> Lexer {
return Lexer{
source = source,
location = 0,
line = 1,
col = 0,
peeked_token = OptionalToken.None
};
}
func lexer_from_file(path: str) -> Lexer return new_lexer(read(path));
func is_space(char: str) -> bool {
return char == " " || char == "\t" || char == "\n";
}
func is_digit(char: str) -> bool {
return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9";
}
func is_alpha(char: str) -> bool {
return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_";
}
func lexer_next_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) do {
lexer.peeked_token = OptionalToken.None;
return token;
}
}
while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do {
if lexer.source[lexer.location] == "\n" do {
lexer.line = lexer.line + 1;
lexer.col = 0;
}
lexer.location = lexer.location + 1;
}
if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof};
if is_digit(lexer.source[lexer.location]) do {
number_str: str = "";
while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do {
number_str = number_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
number: int = str_to_int(number_str);
return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)};
} else if is_alpha(lexer.source[lexer.location]) do {
word_str: str = "";
while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do {
word_str = word_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
match keyword_from_str(word_str) in {
case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)};
case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)};
}
assert false, "Identifier";
} else if lexer.source[lexer.location] == "\"" do {
lexer.location = lexer.location + 1;
string_str: str = "";
escaping: bool = false;
while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do {
escaping = escaping? false: lexer.source[lexer.location] == "\\";
string_str = string_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)};
} else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)};
} else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)};
} else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)};
} else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)};
} else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)};
} else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)};
} else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)};
} else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)};
} else {
match symbol_from_str(lexer.source[lexer.location]) in {
case Some(symbol) do {
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)};
}
case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location];
}
assert false, "Unreachable Symbol";
}
assert false, "Unreachable, next_token";
}
func lexer_peek_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) return token;
case None do {
token: Token = lexer_next_token(lexer);
lexer.peeked_token = OptionalToken.Some(token);
return token;
}
}
assert false, "Unreachable";
}
func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool {
token: Token = lexer_peek_token(lexer);
return token.contents == expected;
}
func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
return OptionalToken.None;
}
func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken {
for token in tokens do {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
}
return OptionalToken.None;
}
func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token {
token: Token = lexer_next_token(lexer);
assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token));
return token;
}
func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool {
for token in tokens if lexer_check_token(lexer, token) return true;
return false;
}
enum TypeExpression {
Tuple(TypeExpression[]),
Union(TypeExpression[]),
List(TypeExpression),
Array(TypeExpression, int),
Name(str),
Specification(TypeExpression, TypeExpression[]),
Function(TypeExpression[], TypeExpression)
}
enum OptionalTypeExpression {
Some(TypeExpression),
None
}
func parse_type_primary(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]);
types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_type = TypeExpression.Tuple(types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
assert false, "Unimplemented parse_type_primary array";
} else {
base_type = TypeExpression.Name(parse_identifier(lexer));
}
closing: Symbol;
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
match symbol in {
case OpenSquare match lexer_peek_token(lexer).contents in {
case Number(number) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_type = TypeExpression.Array(base_type, number);
continue;
}
}
}
match symbol in {
case OpenSquare closing = Symbol.CloseSquare;
case Left closing = Symbol.Right;
case _ assert false, "Unreachable";
}
match symbol in {
case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do {
base_type = TypeExpression.List(base_type);
continue;
}
}
generics: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(closing));
match base_type in {
case Specification assert false, "Cannot specify an already specified type";
}
base_type = TypeExpression.Specification(base_type, generics);
}
case _ assert false, "Unreachable";
}
}
return base_type;
}
func parse_type(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression = parse_type_primary(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type;
return_type: TypeExpression = parse_type(lexer);
match base_type in {
case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type);
}
return TypeExpression.Function([base_type], return_type);
}
struct TypeDeclaration {
name: str,
type_: TypeExpression
}
func parse_type_declaration(lexer: Lexer) -> TypeDeclaration {
entry_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
entry_type: TypeExpression = parse_type(lexer);
return TypeDeclaration{name=entry_name, type_=entry_type};
}
enum EnumEntry {
Const(str),
Tuple(str, TypeExpression[]),
Struct(str, TypeDeclaration[])
}
func parse_enum_entry(lexer: Lexer) -> EnumEntry {
entry_name: str = parse_identifier(lexer);
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
entry_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
return EnumEntry.Tuple(entry_name, entry_types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return EnumEntry.Struct(entry_name, []);
assert false, "Unimplemented parse_enum_entry";
}
return EnumEntry.Const(entry_name);
}
enum Expression {
FunctionCall(Expression, Expression[]),
Variable(str),
ArrayAccess(Expression, Expression),
Array(Expression[]),
FieldAccess(Expression, str),
Number(int),
String(str),
Tuple(Expression[]),
StructInstantiation(Expression, (str, Expression)[]),
LoopComrehension(Expression, str, Expression),
Return(Expression),
Ternary(Expression, Expression, Expression),
Or(Expression, Expression),
And(Expression, Expression),
Bor(Expression, Expression),
Bxor(Expression, Expression),
Band(Expression, Expression),
Equal(Expression, Expression),
NotEqual(Expression, Expression),
LessThan(Expression, Expression),
GreaterThan(Expression, Expression),
LessThanOrEqual(Expression, Expression),
GreaterThanOrEqual(Expression, Expression),
ShiftLeft(Expression, Expression),
ShiftRight(Expression, Expression),
Addition(Expression, Expression),
Subtract(Expression, Expression),
Multiplication(Expression, Expression),
Division(Expression, Expression),
Modulo(Expression, Expression),
Bnot(Expression),
Not(Expression),
UnaryPlus(Expression),
UnaryMinus(Expression)
}
enum OptionalExpression {
Some(Expression),
None
}
func parse_struct_argument(lexer: Lexer) -> (str, Expression) {
parameter: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
return (parameter, parse_expression(lexer));
}
func parse_primary(lexer: Lexer) -> Expression {
base_expression: Expression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]);
else {
elements: Expression[] = [parse_expression(lexer)];
singleton: bool = false;
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do {
if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do {
singleton = true;
break;
}
elements = elements + [parse_expression(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0];
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]);
else {
expressions: Expression[] = [parse_expression(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.LoopComrehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.Array(expressions);
}
}
} else {
match lexer_next_token(lexer).contents in {
case String(string) base_expression = Expression.String(string);
case Number(number) base_expression = Expression.Number(number);
case Identifier(string) base_expression = Expression.Variable(string);
case _token assert false, "Expected identifier, but got %s!" % token_to_str(_token);
}
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) match symbol in {
case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer));
case Open do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []);
else {
arguments: Expression[] = [parse_expression(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = Expression.FunctionCall(base_expression, arguments);
}
}
case OpenSquare do {
index: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.ArrayAccess(base_expression, index);
}
case OpenCurly do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []);
else {
struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.StructInstantiation(base_expression, struct_arguments);
}
}
case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol);
}
case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer));
}
}
return base_expression;
}
func parse_unary(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer));
return parse_primary(lexer);
}
precedences: (Symbol, (Expression, Expression) -> Expression)[][] = [
[(Symbol.Dpipe, Expression.Or)],
[(Symbol.Dampersand, Expression.And)],
[(Symbol.Pipe, Expression.Bor)],
[(Symbol.Carot, Expression.Bxor)],
[(Symbol.Ampersand, Expression.Band)],
[(Symbol.Dequal, Expression.Equal), (Symbol.NotEqual, Expression.NotEqual)],
[(Symbol.Left, Expression.LessThan), (Symbol.Right, Expression.GreaterThan), (Symbol.LesserEqual, Expression.LessThanOrEqual), (Symbol.GreaterEqual, Expression.GreaterThanOrEqual)],
[(Symbol.Dleft, Expression.ShiftLeft), (Symbol.Dright, Expression.ShiftRight)],
[(Symbol.Plus, Expression.Addition), (Symbol.Dash, Expression.Subtract)],
[(Symbol.Asterisk, Expression.Multiplication), (Symbol.Slash, Expression.Division), (Symbol.Percent, Expression.Modulo)]
];
func parse_expression_at_level(lexer: Lexer, level: int) -> Expression {
if level >= len(precedences) return parse_unary(lexer);
left: Expression = parse_expression_at_level(lexer, level+1);
tokens: TokenContents[] = [TokenContents.Symbol(symbol_expressor[0]) for symbol_expressor in precedences[level]];
expressor: (Expression, Expression) -> Expression;
while lexer_check_tokens(lexer, tokens) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
for symbol_expressor in precedences[level] if symbol_expressor[0] == symbol expressor = symbol_expressor[1];
left = expressor(left, parse_expression_at_level(lexer, level+1));
}
case _ assert false, "Unreachable";
}
}
return left;
}
func parse_ternary(lexer: Lexer) -> Expression {
expression: Expression = parse_expression_at_level(lexer, 0);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression;
if_true: Expression = parse_expression_at_level(lexer, 0);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
if_false: Expression = parse_ternary(lexer);
return Expression.Ternary(expression, if_true, if_false);
}
func parse_expression(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do {
parameters: TypeDeclaration[];
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = [];
else do {
parameters = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow));
}
return Expression.Lambda(parameters, parse_expression(lexer));
}
return parse_ternary(lexer);
}
enum Statement {
Statements(Statement[]),
EnumDefinition(str, EnumEntry[]),
StructDefinition(str, TypeDeclaration[]),
FunctionDefinition(str, TypeDeclaration[], OptionalTypeExpression, Statement),
Expression(Expression),
Assignment(Expression, Expression, OptionalTypeExpression),
TypeDeclaration(TypeDeclaration),
If(Expression, Statement, OptionalStatement),
While(Expression, Statement),
DoWhile(Statement, OptionalExpression),
Break,
Continue,
Match(Expression, (Expression, Statement)[]),
Assert(Expression, OptionalExpression),
ForLoop(str, Expression, Statement)
}
func statement_to_str(statement: Statement) -> str {
match statement in {
case EnumDefinition(name, entries) return "Enum %s" % name;
}
assert false, "Unimplemented statement_to_str";
}
enum OptionalStatement {
Some(Statement),
None
}
func parse_identifier(lexer: Lexer) -> str {
identifier_token: Token = lexer_next_token(lexer);
match identifier_token.contents in {
case Identifier(identifier) return identifier;
case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token);
}
}
func parse_number(lexer: Lexer) -> int {
number_token: Token = lexer_next_token(lexer);
match number_token.contents in {
case Number(number) return number;
case _ assert false, "Expected number!";
}
}
func parse_string(lexer: Lexer) -> str {
string_token: Token = lexer_next_token(lexer);
match string_token.contents in {
case String(string) return string;
case _ assert false, "Expected string!";
}
}
func is_valid_target(expression: Expression) -> bool {
match expression in {
case FieldAccess(subexpression, _) return is_valid_target(subexpression);
case Variable(_) return true;
case _ assert false, "Unimplemented is_valid_target %s" % expression;
}
}
func parse_statement(lexer: Lexer) -> Statement {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do {
enum_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []);
enum_entries: EnumEntry[] = [parse_enum_entry(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.EnumDefinition(enum_name, enum_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do {
struct_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []);
struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.StructDefinition(struct_name, struct_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do {
function_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open));
function_arguments: TypeDeclaration[] = [];
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do {
function_arguments = function_arguments + [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
function_return_type: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer));
function_body: Statement = parse_statement(lexer);
return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do {
return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do {
value: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
cases: (Expression, Statement)[] = [];
while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.Match(value, cases);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do {
condition: Expression = parse_expression(lexer);
message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None;
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assert(condition, message);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do {
body: Statement = parse_statement(lexer);
condition: OptionalExpression = OptionalExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
condition = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
}
return Statement.DoWhile(body, condition);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
return Statement.While(parse_expression(lexer), parse_statement(lexer));
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
body: Statement = parse_statement(lexer);
return Statement.ForLoop(variable, expression, body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Continue;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Break;
}
else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
statements = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)];
return Statement.Statements(statements);
} else {
expression: Expression = parse_expression(lexer);
type_: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do {
match expression in {
case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer));
case _ assert false, "Invalid target";
}
}
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do {
assert is_valid_target(expression), "Invalid target!";
right_expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assignment(expression, right_expression, type_);
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
match expression in {
case Variable(name) match type_ in {
case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression});
}
}
return Statement.Expression(expression);
}
}
print("Parsing...\n");
lexer: Lexer = lexer_from_file("test.pyc");
statements: Statement[] = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Eof)) statements = statements + [parse_statement(lexer)];