Initial commit

This is the state of this project as it was back at the end of March,
when I stopped working on it. I left markdown files as documentation
for myself of the stuff I wanted to do.
This commit is contained in:
germax26 2024-08-08 21:54:03 +10:00
commit 7c1ce16f4b
Signed by: germax26
SSH Key Fingerprint: SHA256:N3w+8798IMWBt7SYH8G1C0iJlIa2HIIcRCXwILT5FvM
25 changed files with 4901 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
__pycache__
.mypy_cache

14
TODO.md Normal file
View File

@ -0,0 +1,14 @@
# TODO:
- Evaluate values knowing their types
- Generics
- Syntax sugar for +=
- Unions
- Matching unions (maybe do match cases with an 'as' keyword + type after each case)
- Dictionaries
- Preprocessor?
- Maybe have breaks and continues as values.
# DONE
- Scopes & contexts
- Importing

11
dict.ppp Normal file
View File

@ -0,0 +1,11 @@
func doubles(a: int) -> int -> int {
assert a == 2;
func doubler(b: int) -> int {
return b*2+a;
}
return doubler;
}
doubler_: int -> int = doubles(2);
debug_print(doubler_("5"));

874
ppp-whole.ppp Normal file
View File

@ -0,0 +1,874 @@
enum Keyword {
Enum,
Struct,
Func,
If,
Else,
While,
Break,
Continue,
Do,
For,
To,
In,
Match,
Case,
Assert,
Return,
Lambda
}
enum OptionalKeyword {
Some(Keyword),
None
}
func keyword_from_str(keyword: str) -> OptionalKeyword {
if keyword == "enum" return OptionalKeyword.Some(Keyword.Enum);
if keyword == "struct" return OptionalKeyword.Some(Keyword.Struct);
if keyword == "func" return OptionalKeyword.Some(Keyword.Func);
if keyword == "if" return OptionalKeyword.Some(Keyword.If);
if keyword == "else" return OptionalKeyword.Some(Keyword.Else);
if keyword == "while" return OptionalKeyword.Some(Keyword.While);
if keyword == "break" return OptionalKeyword.Some(Keyword.Break);
if keyword == "continue" return OptionalKeyword.Some(Keyword.Continue);
if keyword == "do" return OptionalKeyword.Some(Keyword.Do);
if keyword == "for" return OptionalKeyword.Some(Keyword.For);
if keyword == "to" return OptionalKeyword.Some(Keyword.To);
if keyword == "in" return OptionalKeyword.Some(Keyword.In);
if keyword == "match" return OptionalKeyword.Some(Keyword.Match);
if keyword == "case" return OptionalKeyword.Some(Keyword.Case);
if keyword == "assert" return OptionalKeyword.Some(Keyword.Assert);
if keyword == "return" return OptionalKeyword.Some(Keyword.Return);
if keyword == "lambda" return OptionalKeyword.Some(Keyword.Lambda);
return OptionalKeyword.None;
}
func keyword_to_str(keyword: Keyword) -> str {
match keyword in {
case Enum return "enum";
case Struct return "struct";
case Func return "func";
case If return "if";
case Else return "else";
case While return "while";
case Break return "break";
case Continue return "continue";
case Do return "do";
case For return "for";
case To return "to";
case In return "in";
case Match return "match";
case Case return "case";
case Assert return "assert";
case Return return "return";
case Lambda return "lambda";
}
assert false, "Invalid keyword";
}
enum Symbol {
Open,
Close,
OpenCurly,
CloseCurly,
Comma,
OpenSquare,
CloseSquare,
Colon,
Left,
Right,
Arrow,
Semicolon,
Equal,
Dequal,
Exclamation,
NotEqual,
Dot,
Plus,
Dash,
Asterisk,
Dasterisk,
Slash,
QuestionMark,
Ampersand,
Dampersand,
Pipe,
Dpipe,
Dleft,
Dright,
GreaterEqual,
LesserEqual,
Percent,
Tilde,
Carot
}
enum OptionalSymbol {
Some(Symbol),
None
}
func symbol_from_str(symbol: str) -> OptionalSymbol {
if symbol == "(" return OptionalSymbol.Some(Symbol.Open);
if symbol == ")" return OptionalSymbol.Some(Symbol.Close);
if symbol == "{" return OptionalSymbol.Some(Symbol.OpenCurly);
if symbol == "}" return OptionalSymbol.Some(Symbol.CloseCurly);
if symbol == "," return OptionalSymbol.Some(Symbol.Comma);
if symbol == "[" return OptionalSymbol.Some(Symbol.OpenSquare);
if symbol == "]" return OptionalSymbol.Some(Symbol.CloseSquare);
if symbol == ":" return OptionalSymbol.Some(Symbol.Colon);
if symbol == "<" return OptionalSymbol.Some(Symbol.Left);
if symbol == ">" return OptionalSymbol.Some(Symbol.Right);
if symbol == "->" return OptionalSymbol.Some(Symbol.Arrow);
if symbol == ";" return OptionalSymbol.Some(Symbol.Semicolon);
if symbol == "=" return OptionalSymbol.Some(Symbol.Equal);
if symbol == "==" return OptionalSymbol.Some(Symbol.Dequal);
if symbol == "!" return OptionalSymbol.Some(Symbol.Exclamation);
if symbol == "!=" return OptionalSymbol.Some(Symbol.NotEqual);
if symbol == "." return OptionalSymbol.Some(Symbol.Dot);
if symbol == "+" return OptionalSymbol.Some(Symbol.Plus);
if symbol == "-" return OptionalSymbol.Some(Symbol.Dash);
if symbol == "*" return OptionalSymbol.Some(Symbol.Asterisk);
if symbol == "**" return OptionalSymbol.Some(Symbol.Dasterisk);
if symbol == "/" return OptionalSymbol.Some(Symbol.Slash);
if symbol == "?" return OptionalSymbol.Some(Symbol.QuestionMark);
if symbol == "&" return OptionalSymbol.Some(Symbol.Ampersand);
if symbol == "&&" return OptionalSymbol.Some(Symbol.Dampersand);
if symbol == "|" return OptionalSymbol.Some(Symbol.Pipe);
if symbol == "||" return OptionalSymbol.Some(Symbol.Dpipe);
if symbol == "<<" return OptionalSymbol.Some(Symbol.Dleft);
if symbol == ">>" return OptionalSymbol.Some(Symbol.Dright);
if symbol == ">=" return OptionalSymbol.Some(Symbol.GreaterEqual);
if symbol == "<=" return OptionalSymbol.Some(Symbol.LesserEqual);
if symbol == "%" return OptionalSymbol.Some(Symbol.Percent);
if symbol == "~" return OptionalSymbol.Some(Symbol.Tilde);
if symbol == "^" return OptionalSymbol.Some(Symbol.Carot);
assert false, "Unimplemented symbol '%s'" % symbol;
}
func symbol_to_str(symbol: Symbol) -> str {
match symbol in {
case Open return "(";
case Close return ")";
case OpenCurly return "{";
case CloseCurly return "}";
case Comma return ",";
case OpenSquare return "[";
case CloseSquare return "]";
case Colon return ":";
case Left return "<";
case Right return ">";
case Arrow return "->";
case Semicolon return ";";
case Equal return "=";
case Dequal return "==";
case Exclamation return "!";
case NotEqual return "!=";
case Dot return ".";
case Plus return "+";
case Dash return "-";
case Asterisk return "*";
case Dasterisk return "**";
case Slash return "/";
case QuestionMark return "?";
case Ampersand return "&";
case Dampersand return "&&";
case Pipe return "|";
case Dpipe return "||";
case Dleft return "<<";
case Dright return ">>";
case GreaterEqual return ">=";
case LesserEqual return "<=";
case Percent return "%";
case Tilde return "~";
case Carot return "^";
}
assert false, "Invalid symbol";
}
enum TokenContents {
Keyword(Keyword),
Identifier(str),
Number(int),
String(str),
Symbol(Symbol),
Eof
}
func token_contents_to_str(token: TokenContents) -> str {
match token in {
case Keyword(keyword) return "Keyword(%s)" % keyword_to_str(keyword);
case Identifier(string) return "Identifier(%s)" % string;
case Number(number) return "Number(%d)" % number;
case String(string) return "String(\"%s\")" % string;
case Symbol(symbol) return "Symbol('%s')" % symbol_to_str(symbol);
case Eof return "Eof";
}
}
struct Token {
line: int,
col: int,
value: str,
contents: TokenContents
}
func token_to_str(token: Token) -> str {
return token_contents_to_str(token.contents)+"{:"+int_to_str(token.line)+":"+int_to_str(token.col)+"}";
}
enum OptionalToken {
Some(Token),
None
}
func is_some_token(maybe_token: OptionalToken) -> bool {
match maybe_token in {
case Some(_) return true;
case None return false;
}
assert false, "Unreachable";
}
struct Lexer {
source: str,
location: int,
line: int,
col: int,
peeked_token: OptionalToken
}
func new_lexer(source: str) -> Lexer {
return Lexer{
source = source,
location = 0,
line = 1,
col = 0,
peeked_token = OptionalToken.None
};
}
func lexer_from_file(path: str) -> Lexer return new_lexer(read(path));
func is_space(char: str) -> bool {
return char == " " || char == "\t" || char == "\n";
}
func is_digit(char: str) -> bool {
return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9";
}
func is_alpha(char: str) -> bool {
return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_";
}
func lexer_next_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) do {
lexer.peeked_token = OptionalToken.None;
return token;
}
}
while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do {
if lexer.source[lexer.location] == "\n" do {
lexer.line = lexer.line + 1;
lexer.col = 0;
}
lexer.location = lexer.location + 1;
}
if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof};
if is_digit(lexer.source[lexer.location]) do {
number_str: str = "";
while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do {
number_str = number_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
number: int = str_to_int(number_str);
return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)};
} else if is_alpha(lexer.source[lexer.location]) do {
word_str: str = "";
while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do {
word_str = word_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
match keyword_from_str(word_str) in {
case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)};
case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)};
}
assert false, "Identifier";
} else if lexer.source[lexer.location] == "\"" do {
lexer.location = lexer.location + 1;
string_str: str = "";
escaping: bool = false;
while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do {
escaping = escaping? false: lexer.source[lexer.location] == "\\";
string_str = string_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)};
} else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)};
} else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)};
} else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)};
} else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)};
} else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)};
} else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)};
} else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)};
} else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)};
} else {
match symbol_from_str(lexer.source[lexer.location]) in {
case Some(symbol) do {
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)};
}
case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location];
}
assert false, "Unreachable Symbol";
}
assert false, "Unreachable, next_token";
}
func lexer_peek_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) return token;
case None do {
token: Token = lexer_next_token(lexer);
lexer.peeked_token = OptionalToken.Some(token);
return token;
}
}
assert false, "Unreachable";
}
func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool {
token: Token = lexer_peek_token(lexer);
return token.contents == expected;
}
func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
return OptionalToken.None;
}
func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken {
for token in tokens do {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
}
return OptionalToken.None;
}
func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token {
token: Token = lexer_next_token(lexer);
assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token));
return token;
}
func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool {
for token in tokens if lexer_check_token(lexer, token) return true;
return false;
}
enum TypeExpression {
Tuple(TypeExpression[]),
Union(TypeExpression[]),
List(TypeExpression),
Array(TypeExpression, int),
Name(str),
Specification(TypeExpression, TypeExpression[]),
Function(TypeExpression[], TypeExpression)
}
enum OptionalTypeExpression {
Some(TypeExpression),
None
}
func parse_type_primary(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]);
types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_type = TypeExpression.Tuple(types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
assert false, "Unimplemented parse_type_primary array";
} else {
base_type = TypeExpression.Name(parse_identifier(lexer));
}
closing: Symbol;
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
match symbol in {
case OpenSquare match lexer_peek_token(lexer).contents in {
case Number(number) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_type = TypeExpression.Array(base_type, number);
continue;
}
}
}
match symbol in {
case OpenSquare closing = Symbol.CloseSquare;
case Left closing = Symbol.Right;
case _ assert false, "Unreachable";
}
match symbol in {
case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do {
base_type = TypeExpression.List(base_type);
continue;
}
}
generics: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(closing));
match base_type in {
case Specification assert false, "Cannot specify an already specified type";
}
base_type = TypeExpression.Specification(base_type, generics);
}
case _ assert false, "Unreachable";
}
}
return base_type;
}
func parse_type(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression = parse_type_primary(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type;
return_type: TypeExpression = parse_type(lexer);
match base_type in {
case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type);
}
return TypeExpression.Function([base_type], return_type);
}
struct TypeDeclaration {
name: str,
type_: TypeExpression
}
func parse_type_declaration(lexer: Lexer) -> TypeDeclaration {
entry_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
entry_type: TypeExpression = parse_type(lexer);
return TypeDeclaration{name=entry_name, type_=entry_type};
}
enum EnumEntry {
Const(str),
Tuple(str, TypeExpression[]),
Struct(str, TypeDeclaration[])
}
func parse_enum_entry(lexer: Lexer) -> EnumEntry {
entry_name: str = parse_identifier(lexer);
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
entry_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
return EnumEntry.Tuple(entry_name, entry_types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return EnumEntry.Struct(entry_name, []);
assert false, "Unimplemented parse_enum_entry";
}
return EnumEntry.Const(entry_name);
}
enum Expression {
FunctionCall(Expression, Expression[]),
Variable(str),
ArrayAccess(Expression, Expression),
Array(Expression[]),
FieldAccess(Expression, str),
Number(int),
String(str),
Tuple(Expression[]),
StructInstantiation(Expression, (str, Expression)[]),
LoopComrehension(Expression, str, Expression),
Return(Expression),
Ternary(Expression, Expression, Expression),
Or(Expression, Expression),
And(Expression, Expression),
Bor(Expression, Expression),
Bxor(Expression, Expression),
Band(Expression, Expression),
Equal(Expression, Expression),
NotEqual(Expression, Expression),
LessThan(Expression, Expression),
GreaterThan(Expression, Expression),
LessThanOrEqual(Expression, Expression),
GreaterThanOrEqual(Expression, Expression),
ShiftLeft(Expression, Expression),
ShiftRight(Expression, Expression),
Addition(Expression, Expression),
Subtract(Expression, Expression),
Multiplication(Expression, Expression),
Division(Expression, Expression),
Modulo(Expression, Expression),
Bnot(Expression),
Not(Expression),
UnaryPlus(Expression),
UnaryMinus(Expression)
}
enum OptionalExpression {
Some(Expression),
None
}
func parse_struct_argument(lexer: Lexer) -> (str, Expression) {
parameter: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
return (parameter, parse_expression(lexer));
}
func parse_primary(lexer: Lexer) -> Expression {
base_expression: Expression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]);
else {
elements: Expression[] = [parse_expression(lexer)];
singleton: bool = false;
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do {
if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do {
singleton = true;
break;
}
elements = elements + [parse_expression(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0];
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]);
else {
expressions: Expression[] = [parse_expression(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.LoopComrehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.Array(expressions);
}
}
} else {
match lexer_next_token(lexer).contents in {
case String(string) base_expression = Expression.String(string);
case Number(number) base_expression = Expression.Number(number);
case Identifier(string) base_expression = Expression.Variable(string);
case _token assert false, "Expected identifier, but got %s!" % token_to_str(_token);
}
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) match symbol in {
case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer));
case Open do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []);
else {
arguments: Expression[] = [parse_expression(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = Expression.FunctionCall(base_expression, arguments);
}
}
case OpenSquare do {
index: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.ArrayAccess(base_expression, index);
}
case OpenCurly do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []);
else {
struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.StructInstantiation(base_expression, struct_arguments);
}
}
case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol);
}
case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer));
}
}
return base_expression;
}
func parse_unary(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer));
return parse_primary(lexer);
}
precedences: (Symbol, (Expression, Expression) -> Expression)[][] = [
[(Symbol.Dpipe, Expression.Or)],
[(Symbol.Dampersand, Expression.And)],
[(Symbol.Pipe, Expression.Bor)],
[(Symbol.Carot, Expression.Bxor)],
[(Symbol.Ampersand, Expression.Band)],
[(Symbol.Dequal, Expression.Equal), (Symbol.NotEqual, Expression.NotEqual)],
[(Symbol.Left, Expression.LessThan), (Symbol.Right, Expression.GreaterThan), (Symbol.LesserEqual, Expression.LessThanOrEqual), (Symbol.GreaterEqual, Expression.GreaterThanOrEqual)],
[(Symbol.Dleft, Expression.ShiftLeft), (Symbol.Dright, Expression.ShiftRight)],
[(Symbol.Plus, Expression.Addition), (Symbol.Dash, Expression.Subtract)],
[(Symbol.Asterisk, Expression.Multiplication), (Symbol.Slash, Expression.Division), (Symbol.Percent, Expression.Modulo)]
];
func parse_expression_at_level(lexer: Lexer, level: int) -> Expression {
if level >= len(precedences) return parse_unary(lexer);
left: Expression = parse_expression_at_level(lexer, level+1);
tokens: TokenContents[] = [TokenContents.Symbol(symbol_expressor[0]) for symbol_expressor in precedences[level]];
expressor: (Expression, Expression) -> Expression;
while lexer_check_tokens(lexer, tokens) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
for symbol_expressor in precedences[level] if symbol_expressor[0] == symbol expressor = symbol_expressor[1];
left = expressor(left, parse_expression_at_level(lexer, level+1));
}
case _ assert false, "Unreachable";
}
}
return left;
}
func parse_ternary(lexer: Lexer) -> Expression {
expression: Expression = parse_expression_at_level(lexer, 0);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression;
if_true: Expression = parse_expression_at_level(lexer, 0);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
if_false: Expression = parse_ternary(lexer);
return Expression.Ternary(expression, if_true, if_false);
}
func parse_expression(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do {
parameters: TypeDeclaration[];
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = [];
else do {
parameters = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow));
}
return Expression.Lambda(parameters, parse_expression(lexer));
}
return parse_ternary(lexer);
}
enum Statement {
Statements(Statement[]),
EnumDefinition(str, EnumEntry[]),
StructDefinition(str, TypeDeclaration[]),
FunctionDefinition(str, TypeDeclaration[], OptionalTypeExpression, Statement),
Expression(Expression),
Assignment(Expression, Expression, OptionalTypeExpression),
TypeDeclaration(TypeDeclaration),
If(Expression, Statement, OptionalStatement),
While(Expression, Statement),
DoWhile(Statement, OptionalExpression),
Break,
Continue,
Match(Expression, (Expression, Statement)[]),
Assert(Expression, OptionalExpression),
ForLoop(str, Expression, Statement)
}
func statement_to_str(statement: Statement) -> str {
match statement in {
case EnumDefinition(name, entries) return "Enum %s" % name;
}
assert false, "Unimplemented statement_to_str";
}
enum OptionalStatement {
Some(Statement),
None
}
func parse_identifier(lexer: Lexer) -> str {
identifier_token: Token = lexer_next_token(lexer);
match identifier_token.contents in {
case Identifier(identifier) return identifier;
case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token);
}
}
func parse_number(lexer: Lexer) -> int {
number_token: Token = lexer_next_token(lexer);
match number_token.contents in {
case Number(number) return number;
case _ assert false, "Expected number!";
}
}
func parse_string(lexer: Lexer) -> str {
string_token: Token = lexer_next_token(lexer);
match string_token.contents in {
case String(string) return string;
case _ assert false, "Expected string!";
}
}
func is_valid_target(expression: Expression) -> bool {
match expression in {
case FieldAccess(subexpression, _) return is_valid_target(subexpression);
case Variable(_) return true;
case _ assert false, "Unimplemented is_valid_target %s" % expression;
}
}
func parse_statement(lexer: Lexer) -> Statement {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do {
enum_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []);
enum_entries: EnumEntry[] = [parse_enum_entry(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.EnumDefinition(enum_name, enum_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do {
struct_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []);
struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.StructDefinition(struct_name, struct_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do {
function_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open));
function_arguments: TypeDeclaration[] = [];
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do {
function_arguments = function_arguments + [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
function_return_type: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer));
function_body: Statement = parse_statement(lexer);
return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do {
return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do {
value: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
cases: (Expression, Statement)[] = [];
while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.Match(value, cases);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do {
condition: Expression = parse_expression(lexer);
message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None;
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assert(condition, message);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do {
body: Statement = parse_statement(lexer);
condition: OptionalExpression = OptionalExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
condition = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
}
return Statement.DoWhile(body, condition);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
return Statement.While(parse_expression(lexer), parse_statement(lexer));
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
body: Statement = parse_statement(lexer);
return Statement.ForLoop(variable, expression, body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Continue;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Break;
}
else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
statements = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)];
return Statement.Statements(statements);
} else {
expression: Expression = parse_expression(lexer);
type_: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do {
match expression in {
case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer));
case _ assert false, "Invalid target";
}
}
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do {
assert is_valid_target(expression), "Invalid target!";
right_expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assignment(expression, right_expression, type_);
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
match expression in {
case Variable(name) match type_ in {
case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression});
}
}
return Statement.Expression(expression);
}
}
print("Parsing...\n");
lexer: Lexer = lexer_from_file("test.pyc");
statements: Statement[] = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Eof)) statements = statements + [parse_statement(lexer)];

3
ppp.ppp Normal file
View File

@ -0,0 +1,3 @@
import "ppp_interpreter.ppp";
interpret_file("ppp.ppp", {});

7
ppp.py Normal file
View File

@ -0,0 +1,7 @@
from ppp_interpreter import interpret_file
import sys
try:
interpret_file(sys.argv[1], {})
except RecursionError as e:
print("Recursion")

95
ppp_ast.ppp Normal file
View File

@ -0,0 +1,95 @@
enum TypeExpression {
Tuple(TypeExpression[]),
Union(TypeExpression[]),
List(TypeExpression),
Array(TypeExpression, int),
Name(str),
Specification(TypeExpression, TypeExpression[]),
Function(TypeExpression[], TypeExpression)
}
enum OptionalTypeExpression {
Some(TypeExpression),
None
}
struct TypeDeclaration {
name: str,
type_: TypeExpression
}
struct EnumEntry {
name: str,
types: TypeExpression[]
}
enum Expression {
FunctionCall(Expression, Expression[]),
Variable(str),
ArrayAccess(Expression, Expression),
Array(Expression[]),
FieldAccess(Expression, str),
Number(int),
String(str),
Tuple(Expression[]),
StructInstantiation(Expression, (str, Expression)[]),
LoopComrehension(Expression, str, Expression),
Dictionary((Expression, Expression)[]),
DictComprehension((Expression, Expression), str, Expression),
Return(Expression),
Ternary(Expression, Expression, Expression),
Or(Expression, Expression),
And(Expression, Expression),
Bor(Expression, Expression),
Bxor(Expression, Expression),
Band(Expression, Expression),
Equal(Expression, Expression),
NotEqual(Expression, Expression),
LessThan(Expression, Expression),
GreaterThan(Expression, Expression),
LessThanOrEqual(Expression, Expression),
GreaterThanOrEqual(Expression, Expression),
ShiftLeft(Expression, Expression),
ShiftRight(Expression, Expression),
Addition(Expression, Expression),
Subtract(Expression, Expression),
Multiplication(Expression, Expression),
Division(Expression, Expression),
Modulo(Expression, Expression),
Bnot(Expression),
Not(Expression),
UnaryPlus(Expression),
UnaryMinus(Expression)
}
enum OptionalExpression {
Some(Expression),
None
}
enum Statement {
Statements(Statement[]),
EnumDefinition(str, EnumEntry[]),
StructDefinition(str, TypeDeclaration[]),
FunctionDefinition(str, TypeDeclaration[], OptionalTypeExpression, Statement),
Expression(Expression),
Assignment(Expression, Expression, OptionalTypeExpression),
TypeDeclaration(TypeDeclaration),
If(Expression, Statement, OptionalStatement),
While(Expression, Statement),
DoWhile(Statement, OptionalExpression),
Break,
Continue,
Match(Expression, (Expression, Statement)[]),
Assert(Expression, OptionalExpression),
ForLoop(str, Expression, Statement),
Import(Expression),
TypeDefinition(str, TypeExpression)
}
enum OptionalStatement {
Some(Statement),
None
}

545
ppp_ast.py Normal file
View File

@ -0,0 +1,545 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union
### Types ###
class TypeExpression(ABC):
@abstractmethod
def represent(self) -> str: ...
@dataclass
class TupleTypeExpr(TypeExpression):
types: List[TypeExpression]
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class UnionTypeExpr(TypeExpression):
types: List[TypeExpression]
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class ListTypeExpr(TypeExpression):
type: TypeExpression
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class ArrayTypeExpr(TypeExpression):
type: TypeExpression
number: int
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class TypeName(TypeExpression):
name: str
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class TypeSpecification(TypeExpression):
type: TypeExpression
types: List[TypeExpression]
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass
class FunctionTypeExpr(TypeExpression):
arguments: List[TypeExpression]
return_type: TypeExpression
def represent(self) -> str:
assert False, ("Unimplemented")
### Statements ###
class Statement:
pass
@dataclass
class Statements(Statement):
statements: List[Statement]
### Enums + Struct ###
@dataclass
class EnumEntry:
name: str
types: List[TypeExpression]
@dataclass
class EnumDefinition(Statement):
name: str
entries: List[EnumEntry]
@dataclass
class TypeDeclaration:
name: str
type: TypeExpression
@dataclass
class StructDefinition(Statement):
name: str
entries: List[TypeDeclaration]
### Function ###
@dataclass
class FunctionDefinition(Statement):
name: str
arguments: list[TypeDeclaration]
return_type: Optional[TypeExpression]
body: Statement
### Expressions ###
class Expression(ABC):
@abstractmethod
def precedence(self) -> int: ...
@abstractmethod
def represent(self) -> str: ...
def wrap(self, other: 'Expression') -> str:
if self.precedence() > other.precedence(): return '('+other.represent()+')'
return other.represent()
@dataclass
class FunctionCall(Expression):
function: Expression
arguments: List[Expression]
def represent(self) -> str:
return self.wrap(self.function)+"("+', '.join([argument.represent() for argument in self.arguments])+")"
def precedence(self) -> int: return 13
@dataclass
class Variable(Expression):
name: str
def represent(self) -> str:
return self.name
def precedence(self) -> int: return 13
@dataclass
class ArrayAccess(Expression):
array: Expression
index: Expression
def represent(self) -> str:
return self.wrap(self.array)+"["+self.index.represent()+"]"
def precedence(self) -> int: return 13
@dataclass
class Array(Expression):
array: List[Expression]
def represent(self) -> str:
return "["+', '.join(map(str, self.array))+"]"
def precedence(self) -> int: return 13
@dataclass
class FieldAccess(Expression):
expression: Expression
field: str
def represent(self) -> str:
return self.wrap(self.expression)+"."+self.field
def precedence(self) -> int: return 13
@dataclass
class Number(Expression):
number: int
def represent(self) -> str:
return str(self.number)
def precedence(self) -> int: return 13
@dataclass
class String(Expression):
string: str
def represent(self) -> str:
return repr(self.string)
def precedence(self) -> int: return 13
@dataclass
class TupleExpr(Expression):
elements: List[Expression]
def represent(self) -> str:
return f"([{', '.join([element.represent() for element in self.elements])}])"
def precedence(self) -> int: return 13
@dataclass
class StructInstantiation(Expression):
struct: Expression
arguments: List[Tuple[str, Expression]]
def represent(self) -> str:
assert False, ("Unimplemented")
def precedence(self) -> int: return 13
@dataclass
class LoopComprehension(Expression):
body: Expression
variable: str # TODO: Pattern matching
array: Expression
def represent(self) -> str:
assert False, ("Unimplemented")
def precedence(self) -> int: return 13
@dataclass
class DictionaryExpr(Expression):
dict: List[Tuple[Expression, Expression]]
def represent(self) -> str: assert False
def precedence(self) -> int: return 13
@dataclass
class DictComprehension(Expression):
body: Tuple[Expression, Expression]
variable: str # TODO: Pattern matching
array: Expression
def represent(self) -> str:
assert False, ("Unimplemented")
def precedence(self) -> int: return 13
@dataclass
class Return(Expression):
expression: Expression
def represent(self) -> str:
# TODO: This will have to be improved
return "return "+self.wrap(self.expression)
def precedence(self) -> int: return 0
@dataclass
class Lambda(Expression):
parameters: List[TypeDeclaration]
expression: Expression
def represent(self) -> str:
assert False, ("Unimplemented")
def precedence(self) -> int: return 0
@dataclass
class Ternary(Expression):
condition: Expression
if_true: Expression
if_false: Expression
def represent(self) -> str:
return self.wrap(self.if_true)+" if "+self.wrap(self.condition)+" else "+self.wrap(self.if_false)
def precedence(self) -> int: return 1
@dataclass
class Or(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" or "+self.wrap(self.rhs)
def precedence(self) -> int: return 2
@dataclass
class And(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" and "+self.wrap(self.rhs)
def precedence(self) -> int: return 3
@dataclass
class Bor(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" | "+self.wrap(self.rhs)
def precedence(self) -> int: return 4
@dataclass
class Bxor(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" ^ "+self.wrap(self.rhs)
def precedence(self) -> int: return 5
@dataclass
class Band(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" & "+self.wrap(self.rhs)
def precedence(self) -> int: return 6
@dataclass
class Equal(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" == "+self.wrap(self.rhs)
def precedence(self) -> int: return 7
@dataclass
class NotEqual(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" != "+self.wrap(self.rhs)
def precedence(self) -> int: return 7
@dataclass
class LessThan(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" < "+self.wrap(self.rhs)
def precedence(self) -> int: return 8
@dataclass
class GreaterThan(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" > "+self.wrap(self.rhs)
def precedence(self) -> int: return 8
@dataclass
class LessThanOrEqual(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" <= "+self.wrap(self.rhs)
def precedence(self) -> int: return 8
@dataclass
class GreaterThanOrEqual(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" >= "+self.wrap(self.rhs)
def precedence(self) -> int: return 8
@dataclass
class ShiftLeft(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" << "+self.wrap(self.rhs)
def precedence(self) -> int: return 9
@dataclass
class ShiftRight(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" >> "+self.wrap(self.rhs)
def precedence(self) -> int: return 9
@dataclass
class Addition(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" + "+self.wrap(self.rhs)
def precedence(self) -> int: return 10
@dataclass
class Subtract(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" - "+self.wrap(self.rhs)
def precedence(self) -> int: return 10
@dataclass
class Multiplication(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" * "+self.wrap(self.rhs)
def precedence(self) -> int: return 11
@dataclass
class Division(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" / "+self.wrap(self.rhs)
def precedence(self) -> int: return 11
@dataclass
class Modulo(Expression):
lhs: Expression
rhs: Expression
def represent(self) -> str:
return self.wrap(self.lhs)+" % "+self.wrap(self.rhs)
def precedence(self) -> int: return 11
@dataclass
class Bnot(Expression):
expression: Expression
def represent(self) -> str:
return "~"+self.wrap(self.expression)
def precedence(self) -> int: return 12
@dataclass
class Not(Expression):
expression: Expression
def represent(self) -> str:
return "!"+self.wrap(self.expression)
def precedence(self) -> int: return 12
@dataclass
class UnaryPlus(Expression):
expression: Expression
def represent(self) -> str:
return "+"+self.wrap(self.expression)
def precedence(self) -> int: return 12
@dataclass
class UnaryMinus(Expression):
expression: Expression
def represent(self) -> str:
return "-"+self.wrap(self.expression)
def precedence(self) -> int: return 12
@dataclass
class ExpressionStatement(Statement):
expression: Expression
### Assignment + Declaration ###
@dataclass
class Assignment(Statement):
lhs: Expression
rhs: Expression
type: Optional[TypeExpression] = None
@dataclass
class TypeDeclarationStatement(Statement):
type_declaration: TypeDeclaration
### Control flow ###
@dataclass
class IfStatement(Statement):
condition: Expression
body: Statement
else_body: Optional[Statement]
@dataclass
class WhileStatement(Statement):
condition: Expression
body: Statement
@dataclass
class DoWhileStatement(Statement):
body: Statement
condition: Optional[Expression]
# TODO: Maybe do something similar to return with these two?
@dataclass
class BreakStatement(Statement):
pass
@dataclass
class ContinueStatement(Statement):
pass
@dataclass
class MatchStatement(Statement):
value: Expression
cases: List[Tuple[Expression, Statement]]
@dataclass
class AssertStatement(Statement):
condition: Expression
message: Optional[Expression]
@dataclass
class ForLoop(Statement):
variable: str # TODO allow for pattern matching
array: Expression
body: Statement
@dataclass
class Import(Statement):
file: Expression
@dataclass
class TypeDefinition(Statement):
name: str
expression: TypeExpression

288
ppp_interpreter.ppp Normal file
View File

@ -0,0 +1,288 @@
import "ppp_tokens.ppp";
import "ppp_lexer.ppp";
import "ppp_ast.ppp";
import "ppp_parser.ppp";
import "ppp_types.ppp";
import "ppp_object.ppp";
import "ppp_stdlib.ppp";
enum VariableState {
Declared(Type, Object),
Undeclared(Type),
Constant(Type, Object)
}
func declared_from_obj(obj: Object) -> VariableState return VariableState.Declared(object_get_type(obj), obj);
type Module = dict[str, VariableState];
struct Program {
modules: dict[str, Module],
contexts: dict[str, VariableState][]
}
func program_exists(program: Program, name: str) -> bool {
for context in program.contexts do {
for name_ in context do {
if name_ == name return true;
}
}
return false;
}
func program_access_variable(program: Program, name: str) -> Object {
i: int = len(program.contexts) - 1;
while i >= 0 do {
context: dict[str, VariableState] = program.contexts[i];
for name_ in context do {
if name_ == name do {
value: VariableState = context[name];
match value in {
case Declared(_, value) return value;
case Const(_, value) return value;
case Undeclared(_) assert false, "'%s' is not defined!" % name;
case _ assert false, "Unimplemented program_access_variable %s" % value;
}
}
}
i = i - 1;
}
assert false, "'%s' is not defined!" % name;
}
func program_declare_variable(program: Program, name: str, type_: Type) {
for name_ in program.contexts[len(program.contexts)-1] assert name_ != name, "'%s' has already been declared!" % name;
program.contexts[len(program.contexts)-1][name] = VariableState.Undeclared(type_);
}
func program_assign_variable(program: Program, name: str, value: Object) {
i: int = len(program.contexts) - 1;
while i >= 0 do {
context: dict[str, VariableState] = program.contexts[i];
for name_ in context do {
if name_ == name do {
variable_type: Type;
match context[name] in {
case Undeclared(type_) variable_type = type_;
case _ assert false, "Unimplemented program_assign_variable %s" % context[name];
}
assert type_is_subtype_of(object_get_type(value), variable_type), "In the assignment of '%s', expected value of type '%s', but got a value of type '%s'!" % (name, type_represent(variable_type, type_represent(object_get_type(value))));
context[name] = VariableState.Declared(variable_type, value);
return none;
}
}
i = i - 1;
}
assert false, "'%s' doesn't exist!" % name;
}
func program_declare_and_assign_variable(program: Program, name: str, value: Object) {
program_declare_variable(program, name, object_get_type(value));
program_assign_variable(program, name, value);
}
func calculate_expression(expression: Expression, program: Program) -> Object {
match expression in {
case String(string) return Object.Str(string);
case Array(array_) do {
if len(array_) == 0 return Object.List(Type.List(Type.Variable("")), []);
elements_type: Type;
array_elements: Object[] = [];
for i in range(0, len(array_)) do {
element: Object = calculate_expression(array_[i], program);
if i == 0 elements_type = object_get_type(element);
else assert type_is_subtype_of(object_get_type(element), elements_type), "Array element invalid type";
array_elements = array_elements + [element];
}
assert false, "array %s" % array_elements;
}
case Dictionary(dict_) do {
if len(dict_) == 0 return Object.Dictionary(Type.Dictionary(Type.Variable(""), Type.Variable("")), {});
key_type: Type;
value_type: Type;
dict: dict[Object, Object] = {};
for i in range(0, len(dict_)) do {
key: Object = calculate_expression(dict_[i][0], program);
value: Object = calculate_expression(dict_[i][1], program);
if i == 0 do {
key_type = object_get_type(key);
value_type = object_get_type(value);
} else {
assert type_is_subtype_of(object_get_type(key), key_type), "Dict element invalid key type";
assert type_is_subtype_of(object_get_type(value), value_type), "Dict element invalid value type";
}
dict[key] = value;
}
assert false, "dict %s" % dict;
}
case FieldAccess(expression_, field) do {
value: Object = calculate_expression(expression_, program);
match value in {
case Type(type_) match type_ in {
case Enum(name, members, _) do {
for member in members do {
if member == field do {
if len(members[field]) == 0 return Object.EnumValue(type_, field, []);
func return_member(name: str, parameters: (str, Type)[], return_type: Type, statement: Statement, args: Object[]) -> Object {
match return_type in {
case Enum(_, _, _) do {}
case _ assert false, "Unreachable";
}
assert len(args) == len(parameters), "%s.%s expected %s arguments but got %s!" % (type_represent(type_), field, int_to_str(len(parameters)), int_to_str(len(args)));
for i in range(0, len(args)) assert type_is_subtype_of(object_get_type(args[i]), parameters[i][1]);
return Object.EnumValue(return_type, name, args);
}
return Object.Function(Type.Function(members[field], type_), (field, [("", member_type) for member_type in members[field]], type_, Statement.Statements([]), return_member));
}
}
assert false, "g";
}
case _ assert false, "Unimplemented calculate_expression field access type %s" % type_represent(type_);
}
case _ assert false, "Unimplemented calculate_expression field access %s" % value;
}
}
case Variable(name) return program_access_variable(program, name);
case _ assert false, "Unimplemented calculate_expression %s" % expression;
}
}
func calculate_type_expression(expression: TypeExpression, program: Program, must_resolve: bool) -> Type {
match expression in {
case Name(name) do {
if !program_exists(program, name) && !must_resolve return Type.Variable(name);
type_obj: Object = program_access_variable(program, name);
match type_obj in {
case Type(type_) return type_;
case _ assert false, "Unimplemented %s" % type_obj;
}
}
case List(type_) return Type.List(calculate_type_expression(type_, program, must_resolve));
case Tuple(types) return Type.Tuple([calculate_type_expression(type_, program, must_resolve) for type_ in types]);
case _ assert false, "Unimplemented calculate_type_expression %s" % expression;
}
}
func update_types(type_: Type, program: Program) {
type_name: str;
match type_ in {
case Enum(name, _, _) type_name = name;
case Struct(name, _, _) type_name = name;
case _ assert false, "Unimplemented update_types %s" % type_represent(type_);
}
for context in program.contexts do {
for variable_ in context do {
match context[variable_] in {
case Declared(variable_type, value) match value in {
case Type(variable_type_) do {
assert variable_type == Type.Type;
type_fill(variable_type_, {type_name: type_}, []);
}
}
case Undeclared(_) do {}
case _ assert false, "Unimplemented update_types %s" % context[variable_];
}
}
}
}
enum Result {
Return(Object),
Continue,
Break,
Nothing
}
func interpret_statements(statements: Statement[], program: Program) -> Result {
for statement in statements do {
match statement in {
case Import(file_) do {
file_path: Object = calculate_expression(file_, program);
match file_path in {
case Str(file_path_str) do {
found: bool = false;
module: Module;
for module_ in program.modules do {
if module_ == file_path_str do {
found = true;
module = program.modules[module_];
}
}
if !found do {
print("Importing %s\n" % file_path_str);
module = interpret_file(file_path_str, program.modules);
}
for variable in module do program.contexts[0][variable] = module[variable];
if !found program.modules[file_path_str] = module;
}
case _ assert false, "Unimplemented interpret_statement import %s" % file_path;
}
}
case EnumDefinition(name, entries) do {
enum_type: Type = Type.Enum(name, {entry.name: [calculate_type_expression(type_, program, false) for type_ in entry.types] for entry in entries}, []);
program_declare_and_assign_variable(program, name, Object.Type(enum_type));
update_types(enum_type, program);
}
case FunctionDefinition(name, arguments_, return_type_, body) do {
func run_function(name: str, arguments: (str, Type)[], return_type: Type, body: Statement, args: Object[]) -> Object {
assert false, "run_function";
}
arguments: (str, Type)[] = [(argument.name, calculate_type_expression(argument.type_, program, true)) for argument in arguments_];
return_type: Type;
match return_type_ in {
case Some(type_) return_type = calculate_type_expression(type_, program, true);
case None return_type = Type.Void;
}
function_type: Type = Type.Function([argument[1] for argument in arguments], return_type);
object: Object = Object.Function(function_type, (name, arguments, return_type, body, run_function));
program_declare_and_assign_variable(program, name, object);
}
case StructDefinition(name, entries) do {
struct_type: Type = Type.Struct(name, {entry.name: calculate_type_expression(entry.type_, program, false) for entry in entries}, []);
program_declare_and_assign_variable(program, name, Object.Type(struct_type));
update_types(struct_type, program);
}
case Assignment(lhs, rhs, type_) do {
assert is_valid_target(lhs);
match lhs in {
case Variable(name) do {
value: Object = calculate_expression(rhs, program);
match type_ in {
case _ assert false, "uuuu %s" % type_;
}
}
case _ assert false, "Unimplemented interpret_statement assignment %s" % lhs;
}
}
case _ assert false, "Unimplemented interpret_statement %s" % statement;
}
}
return Result.Nothing;
}
func interpret_file(file_path: str, modules: dict[str, Module]) -> Module {
print("Parsing %s...\n" % file_path);
lexer: Lexer = lexer_from_file(file_path);
statements: Statement[] = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Eof)) statements = statements + [parse_statement(lexer)];
new_variables: dict[str, VariableState] = {};
for variable in variables new_variables[variable] = declared_from_obj(variables[variable]);
program: Program = Program{modules=modules, contexts=[new_variables, {}]};
print("Interpreting %s...\n" % file_path);
return_value: Result = interpret_statements(statements, program);
assert len(program.contexts) == 2;
match return_value in {
case Nothing do {}
case Return(_) assert false, "Cannot return from outside a function!";
case Continue assert false, "Cannot continue from outside a loop!";
case Break assert false, "Cannot break from outside a loop!";
case _ assert false, "Unimplemented interpret_file return_value";
}
return program.contexts[1];
}

679
ppp_interpreter.py Normal file
View File

@ -0,0 +1,679 @@
from dataclasses import dataclass
from typing import Dict, List as List_, Optional, Tuple, Union
from ppp_ast import *
from ppp_lexer import Lexer
from ppp_object import Bool, Dictionary, EnumValue, Function, Hashable, Int, Object, Str, Struct, Tuple as TupleObject, List as ListObject, Return as ReturnObject, TypeObject, Dictionary as DictionaryObject, Void
from ppp_parser import is_valid_target, parse_statement
from ppp_tokens import EofToken
from ppp_stdlib import variables
from ppp_types import DictionaryType, EnumType, FunctionType, GenericType, Int as IntType, ListType, ReturnType, Str as StrType, StructType, TupleType, Type, TypeType, UnionType, VariableType, Void as VoidType
@dataclass
class Declared:
type: Type
value: Object
@staticmethod
def from_obj(obj: Object) -> 'Declared':
return Declared(obj.get_type(), obj)
@dataclass
class Undeclared:
type: Type
@dataclass
class Constant:
type: Type
value: Object
@staticmethod
def from_obj(obj: Object) -> 'Declared':
return Declared(obj.get_type(), obj)
VariableState = Union[Declared, Undeclared, Constant]
Module = Dict[str, VariableState]
@dataclass
class ProgramState:
modules: Dict[str, Module] # TODO: What is the type of module?
contexts: List_[Dict[str, VariableState]]
def push_context(self, variables: Dict[str, VariableState]): self.contexts.append(variables)
def pop_context(self): self.contexts.pop()
def declare_variable(self, name: str, type: Type):
assert not (name in self.contexts[-1]), f"'{name}' has already been declared!"
self.contexts[-1][name] = Undeclared(type)
def assign_variable(self, name: str, value: Object):
for context in self.contexts[::-1]:
if name in context:
assert value.get_type().is_subtype_of(context[name].type), f"In the assignment of '{name}', expected value of type {context[name].type.represent()}, but got a value of type {value.get_type().represent()}!"
context[name] = Declared(context[name].type, value)
return
assert False, f"'{name}' doesn't exist!"
def declare_and_assign_variable(self, name: str, value: Object):
self.declare_variable(name, value.get_type())
self.assign_variable(name, value)
def exists(self, name: str) -> bool:
for context in self.contexts[::-1]:
if name in context: return True
return False
def access_variable(self, name: str) -> Object:
for context in self.contexts[::-1]:
if name in context:
value = context[name]
assert not isinstance(value, Undeclared), f"{name} is not declared!"
return value.value
assert False, f"'{name}' is not defined!"
def is_truthy(object: Object) -> bool:
match object:
case Bool(value): return value
case _: assert False, ("Unimplemented", object)
def calculate_expression(expression: Expression, program: ProgramState) -> Object:
match expression:
case FunctionCall(function_, arguments_):
function = calculate_expression(function_, program)
assert isinstance(function, Function), (function_, function)
name, parameters, return_type, body, func = function.function
arguments = [calculate_expression(argument, program) for argument in arguments_]
assert len(arguments) == len(parameters), f"{name} expected {len(parameters)} arguments, but got {len(arguments)}!"
for (argument, (parameter_name, parameter)) in zip(arguments, parameters):
assert argument.get_type().is_subtype_of(parameter), f"For argument '{parameter_name}' of '{name}', expected value of type {parameter.represent()}, but got {argument.get_type().represent()}!"
return_value = func(name, parameters, return_type, body, *arguments)
assert isinstance(return_value, Object), return_value
assert return_value.get_type().is_subtype_of(return_type)
return return_value
case Variable(name):
return program.access_variable(name)
case String(string):
return Str(string)
case Number(number):
return Int(number)
case TupleExpr(elements_):
tuple_elements = [calculate_expression(element, program) for element in elements_]
return TupleObject(TupleType([element.get_type() for element in tuple_elements]), tuple(tuple_elements))
case Ternary(condition_, if_true, if_false):
return calculate_expression(if_true, program) if is_truthy(calculate_expression(condition_, program)) else calculate_expression(if_false, program)
case Or(lhs, rhs):
left_value = calculate_expression(lhs, program)
assert isinstance(left_value, Bool)
if left_value.value: return Bool(True)
right_value = calculate_expression(rhs, program)
assert isinstance(right_value, Bool)
return Bool(left_value.value or right_value.value)
case And(lhs, rhs):
left_value = calculate_expression(lhs, program)
assert isinstance(left_value, Bool)
if not left_value.value: return Bool(False)
right_value = calculate_expression(rhs, program)
assert isinstance(right_value, Bool)
return Bool(left_value.value and right_value.value)
case Bor(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case Bxor(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case Band(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case Equal(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
if left_value.get_type() != right_value.get_type(): return Bool(False)
return Bool(left_value == right_value)
case NotEqual(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
if left_value.get_type() != right_value.get_type(): return Bool(True)
return Bool(left_value != right_value)
case LessThan(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Bool(left_value.num < right_value.num)
case GreaterThan(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Bool(left_value.num > right_value.num)
case LessThanOrEqual(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Bool(left_value.num <= right_value.num)
case GreaterThanOrEqual(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Bool(left_value.num >= right_value.num)
case ShiftLeft(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case ShiftRight(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case Addition(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
if isinstance(left_value, Int):
assert isinstance(right_value, Int)
return Int(left_value.num + right_value.num)
elif isinstance(left_value, Str):
assert isinstance(right_value, Str)
return Str(left_value.str + right_value.str)
elif isinstance(left_value, ListObject):
assert isinstance(right_value, ListObject)
if left_value.type.type == VariableType(""): return right_value
if right_value.type.type == VariableType(""): return left_value
assert left_value.type == right_value.type, (left_value, right_value)
return ListObject(left_value.type, left_value.list + right_value.list)
else:
assert False, f"Expected two ints or two strs. Got {left_value.get_type().represent()} and {right_value.get_type().represent()}!"
case Subtract(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Int(left_value.num - right_value.num)
case Multiplication(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
assert isinstance(left_value, Int)
assert isinstance(right_value, Int)
return Int(left_value.num * right_value.num)
case Division(lhs, rhs):
assert False, ("Unimplemented", lhs, rhs)
case Modulo(lhs, rhs):
left_value = calculate_expression(lhs, program)
right_value = calculate_expression(rhs, program)
if isinstance(left_value, Int):
assert isinstance(right_value, Int), f"Expected int, got {right_value.get_type().represent()}!"
return Int(left_value.num % right_value.num)
elif isinstance(left_value, Str):
# TODO: Maybe actually just implement C-style string formatting? This code is a mess
match right_value:
case TupleObject(_, tuple_obj_elements):
assert left_value.str.count("%"+"s") == len(tuple_obj_elements), (left_value.str.count("%%s"), len(tuple_obj_elements))
the_elements: Tuple[str, ...] = ()
for element in tuple_obj_elements:
assert isinstance(element, Str)
the_elements += (element.str,)
return Str(left_value.str % the_elements)
case Str(string):
return Str(left_value.str % string)
case _:
assert False, f"Format string expected either a string or a tuple of strings, but got a '{right_value.get_type().represent()}'!\n{lhs, rhs, right_value}"
match right_value:
case Int(num): return Str(left_value.str % num)
case _: assert False, ("Unimplemented", right_value)
assert False, ("Unimplemented", lhs, rhs)
case Return(expression):
value = calculate_expression(expression, program)
return ReturnObject(ReturnType(value.get_type()), value)
case StructInstantiation(struct_, arguments_):
struct = calculate_expression(struct_, program)
assert isinstance(struct, TypeObject)
assert isinstance(struct.type, StructType)
struct_arguments = {name: calculate_expression(expression, program) for (name, expression) in arguments_}
for field in struct_arguments:
assert field in struct.type.members, f"The struct {struct.type.name} does not have the field '{field}'!"
assert struct_arguments[field].get_type().is_subtype_of(struct.type.members[field]), f"'{struct.type.name}.{field}' field expected value of type {struct.type.members[field].represent()}, but got a value of type {struct_arguments[field].get_type().represent()}!"
for field in struct.type.members:
assert field in struct_arguments, f"Missing field '{field}' of type {struct.type.represent()}."
return Struct(struct.type, struct_arguments)
case FieldAccess(expression_, field):
value = calculate_expression(expression_, program)
match value:
case TypeObject(type):
match type:
case EnumType(name, members, _):
assert field in members, f"{type.represent()} does not contain the member '{field}'!"
member = members[field]
if not member: return EnumValue(type, field, [])
def return_member(name: str, parameters: List_[Tuple[str, Type]], return_type: Type, _statement: Statement, *args: Object):
assert isinstance(return_type, EnumType)
assert len(args) == len(parameters)
for (arg, (_, parameter)) in zip(args, parameters):
assert arg.get_type().is_subtype_of(parameter)
return EnumValue(return_type, name, list(args))
return Function(FunctionType(member, type), (field, [('', member_type) for member_type in member], type, Statements([]), return_member))
case _: assert False, ("Unimplemented", type, field)
case Struct(type, fields):
assert field in fields, f"Struct '{type.represent()}' does not have the field '{field}'!"
return fields[field]
case _: assert False, ("Unimplemented", value, field)
case ArrayAccess(array_, index_):
array = calculate_expression(array_, program)
assert array.get_type().is_indexable(), f"Objects of type {array.get_type().represent()} cannot be indexed!"
if isinstance(array, Str):
index = calculate_expression(index_, program)
assert isinstance(index, Int), f"Index must be '{IntType.represent()}', got '{index.get_type().represent()}'!"
assert 0 <= index.num < len(array.str), f"Index out of bounds. Str of length {len(array.str)} accessed at index {index.num}. {array_, index_}"
return Str(array.str[index.num])
elif isinstance(array, TupleObject):
index = calculate_expression(index_, program)
assert isinstance(index, Int), f"Index must be '{IntType.represent()}', got '{index.get_type().represent()}'!"
assert 0 <= index.num <= len(array.tuple), f"Index out of bounds. Tuple of length {len(array.tuple)} accessed at index {index.num}. {array_, index_}"
return array.tuple[index.num]
elif isinstance(array, ListObject):
array_type = array.type.type
index = calculate_expression(index_, program)
assert isinstance(index, Int), f"Index must be '{IntType.represent()}', got '{index.get_type().represent()}'!"
assert 0 <= index.num < len(array.list), f"Index out of bounds. List of length {len(array.list)} accessed at index {index.num}"
element = array.list[index.num]
assert element.get_type().is_subtype_of(array_type)
return element
elif isinstance(array, DictionaryObject):
index = calculate_expression(index_, program)
assert index.get_type().is_subtype_of(array.type.key_type)
index_h = index.hash()
assert index_h in array.dict, f"{index} is not in {array}! {array_}, {index_}"
value = array.dict[index_h]
assert value.get_type().is_subtype_of(array.type.value_type)
return value
else:
assert False, "Unreachable"
case Bnot(expression_):
assert False, ("Unimplemented", expression_)
case Not(expression_):
value = calculate_expression(expression_, program)
assert isinstance(value, Bool)
return Bool(not value.value)
case UnaryPlus(expression_):
assert False, ("Unimplemented", expression_)
case UnaryMinus (expression_):
assert False, ("Unimplemented", expression_)
case Array(array_):
if len(array_) == 0:
return ListObject(ListType(VariableType("")), [])
elements_type: Optional[Type] = None
array_elements_: List_[Object] = []
for element_ in array_:
element = calculate_expression(element_, program)
if elements_type:
assert element.get_type().is_subtype_of(elements_type), (element, elements_type)
else:
elements_type = element.get_type()
array_elements_.append(element)
assert elements_type
return ListObject(ListType(elements_type), array_elements_)
case LoopComprehension(body_, variable, array_):
array = calculate_expression(array_, program)
assert array.get_type().is_indexable()
if isinstance(array, ListObject):
elements: List_[Object] = []
elements_type = None
for element in array.list:
program.push_context({variable: Declared.from_obj(element)})
elements.append(calculate_expression(body_, program))
program.pop_context()
if elements_type:
assert elements[-1].get_type().is_subtype_of(elements_type)
else:
elements_type = elements[-1].get_type()
if not elements: return ListObject(ListType(VariableType("")), [])
assert elements_type
return ListObject(ListType(elements_type), elements)
elif isinstance(array, Dictionary):
elements = []
elements_type = None
for element_h in array.dict:
element = element_h.get_object()
program.push_context({variable: Declared.from_obj(element)})
elements.append(calculate_expression(body_, program))
program.pop_context()
if elements_type:
assert elements[-1].get_type().is_subtype_of(elements_type)
else:
elements_type = elements[-1].get_type()
if not elements: return ListObject(ListType(VariableType("")), [])
assert elements_type
return ListObject(ListType(elements_type), elements)
else:
assert False, ("Unimplemented", array)
case DictionaryExpr(dict_):
dict: Dict[Hashable, Object] = {}
if not dict_:
return Dictionary(DictionaryType(VariableType(""), VariableType("")), {})
key_type, value_type = None, None
for (key_, value_) in dict_:
key = calculate_expression(key_, program)
value = calculate_expression(value_, program)
if key_type:
assert value_type, "Unreachable"
assert key.get_type().is_subtype_of(key_type)
assert value.get_type().is_subtype_of(value_type)
else:
assert not value_type
key_type = key.get_type()
value_type = value.get_type()
dict[key.hash()] = value
assert key_type and value_type
assert not (isinstance(key_type, VariableType) and key_type.name == '')
return Dictionary(DictionaryType(key_type, value_type), dict)
case DictComprehension(body_, variable, array_):
array = calculate_expression(array_, program)
assert array.get_type().is_indexable()
if isinstance(array, ListObject):
key_, value_ = body_
dict_entries: Dict[Hashable, Object] = {}
key_type = None
value_type = None
for element in array.list:
program.push_context({variable: Declared.from_obj(element)})
key = calculate_expression(key_, program)
key_h = key.hash()
dict_entries[key_h] = calculate_expression(value_, program)
program.pop_context()
if key_type:
assert value_type
assert key.get_type().is_subtype_of(key_type)
assert dict_entries[key_h].get_type().is_subtype_of(value_type)
else:
assert not value_type
key_type = key.get_type()
value_type = dict_entries[key_h].get_type()
if not dict_entries: return Dictionary(DictionaryType(VariableType(""), VariableType("")), {})
assert key_type and value_type
assert not (isinstance(key_type, VariableType) and key_type.name == '')
return Dictionary(DictionaryType(key_type, value_type), dict_entries)
case _:
assert False, ("Unimplemented", expression)
assert False
def calculate_type_expression(expression: TypeExpression, program: ProgramState, must_resolve:bool=True) -> Type:
match expression:
case TypeName(name):
if not program.exists(name) and not must_resolve: return VariableType(name)
type_obj = program.access_variable(name)
assert isinstance(type_obj, TypeObject)
return type_obj.type
case ListTypeExpr(type_):
return ListType(calculate_type_expression(type_, program, must_resolve))
case TupleTypeExpr(types_):
return TupleType([calculate_type_expression(type, program, must_resolve) for type in types_])
case UnionTypeExpr(types_):
return UnionType([calculate_type_expression(type, program, must_resolve) for type in types_])
case FunctionTypeExpr(arguments_, return_type_):
return FunctionType([calculate_type_expression(argument, program, must_resolve) for argument in arguments_], calculate_type_expression(return_type_, program, must_resolve))
case TypeSpecification(type_, types_):
type = calculate_type_expression(type_, program, must_resolve)
assert isinstance(type, GenericType)
assert len(type.variables) == len(types_)
types = [calculate_type_expression(type_, program, must_resolve) for type_ in types_]
result_type = type.substitute(types)
return result_type
case _:
assert False, ("Unimplemented", expression)
assert False, "Unreachable"
def match_enum_expression(enum: Type, value: Object, expression: Expression) -> Optional[Dict[str, Object]]:
assert isinstance(enum, EnumType)
assert isinstance(value, EnumValue)
match expression:
case Variable(name):
if name.startswith('_'): return {name: value}
assert name in enum.members, f"Enum '{enum.represent()}' does not contain the member '{name}'!"
assert enum.members[name] == [], f"Enum member '{enum.represent()}.{name}' has {len(enum.members[name])} fields that have not been captured!"
if name != value.name or value.values: return None
return {}
case FunctionCall(function, arguments):
assert isinstance(function, Variable)
assert function.name in enum.members, f"Enum '{enum.represent()}' does not contain the member '{function.name}'!"
if function.name != value.name: return None
member = enum.members[function.name]
assert isinstance(member, list) # TODO: Report calling a struct enum member with parentheses
assert isinstance(value.values, list) # Same as above but like inverse
assert len(arguments) == len(member), f"{value.get_type().represent()}.{value.name} expected {len(member)} args, but got {len(arguments)}!"
assert len(member) == len(value.values)
new_variables: Dict[str, Object] = {}
for argument, element in zip(arguments, value.values):
assert isinstance(argument, Variable) # TODO
new_variables[argument.name] = element
return new_variables
case _:
assert False, ("Unimplemented", expression)
assert False, ("Unimplemented", value, expression)
def update_types(type: Type, program: ProgramState):
assert isinstance(type, EnumType) or isinstance(type, StructType)
for context in program.contexts:
for variable_ in context:
variable = context[variable_]
match variable:
case Declared(type_, value):
if isinstance(variable.value, TypeObject):
assert type_ == TypeType
assert isinstance(value, TypeObject)
value.type.fill({type.name: type}, [])
case Undeclared(type_): pass
case _: assert False, ("Unimplemented", variable)
@dataclass
class ReturnResult:
value: Object
@dataclass
class ContinueResult:
pass
@dataclass
class BreakResult:
pass
@dataclass
class NothingResult:
pass
StatementsResult = Union[ReturnResult, ContinueResult, BreakResult, NothingResult]
def interpret_statements(statements: List_[Statement], program: ProgramState) -> StatementsResult:
for statement in statements:
match statement:
case ExpressionStatement(expression):
value = calculate_expression(expression, program)
if isinstance(value, ReturnObject): return ReturnResult(value.value)
case Assignment(lhs, rhs, type_):
assert is_valid_target(lhs)
match lhs:
case Variable(name):
value = calculate_expression(rhs, program)
if type_:
type = calculate_type_expression(type_, program)
program.declare_variable(name, type)
program.assign_variable(name, value)
case FieldAccess(expression_, field):
expr = calculate_expression(expression_, program)
assert isinstance(expr, Struct)
struct_type = expr.get_type()
assert isinstance(struct_type, StructType)
assert field in struct_type.members, f"Struct '{struct_type.represent()}' does not contain the field '{field}'!"
value = calculate_expression(rhs, program)
assert value.get_type().is_subtype_of(struct_type.members[field])
expr.fields[field] = value
case ArrayAccess(array_, index_):
array = calculate_expression(array_, program)
index = calculate_expression(index_, program)
value = calculate_expression(rhs, program)
assert array.get_type().is_indexable(), array
match array:
case Dictionary(dict_type, dict_):
try:
index_h = index.hash()
except AssertionError:
assert False, (array_, index_, index, dict_)
if isinstance(dict_type.key_type, VariableType) and dict_type.key_type.name == "":
dict_type.key_type, dict_type.value_type = index.get_type(), value.get_type()
assert index.get_type().is_subtype_of(dict_type.key_type), (index, dict_type.key_type)
assert value.get_type().is_subtype_of(dict_type.value_type), (value, dict_type.value_type)
dict_[index_h] = value
case _: assert False, ("Unimplemented", array)
case _:
assert False, ("Unimplemented", lhs)
case IfStatement(condition, body, else_body):
if is_truthy(calculate_expression(condition, program)):
program.push_context({})
return_value = interpret_statements([body], program)
program.pop_context()
if not isinstance(return_value, NothingResult): return return_value
elif else_body:
program.push_context({})
return_value = interpret_statements([else_body], program)
program.pop_context()
if not isinstance(return_value, NothingResult): return return_value
case Statements(statements):
# TODO: Proper context and scoping
program.push_context({})
return_value = interpret_statements(statements, program)
program.pop_context()
if not isinstance(return_value, NothingResult): return return_value
case FunctionDefinition(name, arguments_, return_type_, body):
def run_function(name: str, arguments: List_[Tuple[str, Type]], return_type: Type, body: Statement, *args: Object) -> Object:
assert len(args) == len(arguments), f"'{name}' expected {len(arguments)} arguments, but got {len(args)} instead!"
new_program = ProgramState(program.modules, program.contexts[:2])
new_program.push_context({})
for (argument, (argument_name, argument_type)) in zip(args, arguments):
assert argument.get_type().is_subtype_of(argument_type), f"'{name}' expected argument '{argument_name}' to have a value of type {argument_type.represent()}, but got {argument.get_type().represent()} instead!"
new_program.declare_variable(argument_name, argument_type)
new_program.assign_variable(argument_name, argument)
return_value = interpret_statements([body], new_program)
new_program.pop_context()
assert len(new_program.contexts) == 2
match return_value:
case ReturnResult(value):
assert value.get_type().is_subtype_of(return_type), f"'{name}' expected a return value of type {return_type.represent()}, but got {value.get_type().represent()}!"
return value
case NothingResult():
assert return_type.is_subtype_of(VoidType), f"'{name}' expected a return type of {return_type.represent()} but got nothing!"
return Void
case _: assert False, ("Unimplemented", return_value)
arguments = [(argument.name, calculate_type_expression(argument.type, program)) for argument in arguments_]
return_type = calculate_type_expression(return_type_, program) if return_type_ else VoidType
function_type = FunctionType([argument[1] for argument in arguments], return_type)
object = Function(function_type, (name, arguments, return_type, body, run_function))
program.declare_and_assign_variable(name, object)
case EnumDefinition(name, entries):
enum_type = EnumType(name, {entry.name: [calculate_type_expression(type, program, False) for type in entry.types] for entry in entries}, [])
program.declare_and_assign_variable(name, TypeObject(enum_type))
update_types(enum_type, program)
case StructDefinition(name, entries):
struct_type = StructType(name, {entry.name: calculate_type_expression(entry.type, program, False) for entry in entries}, [])
program.declare_and_assign_variable(name, TypeObject(struct_type))
update_types(struct_type, program)
case MatchStatement(value_, cases):
value = calculate_expression(value_, program)
assert isinstance(value, EnumValue), f"Cannot only match over enums, got {value.get_type().represent()} instead!"
assert isinstance(value.type, EnumType) # TODO: Pattern match things besides enums
for case in cases:
if (new_variables := match_enum_expression(value.type, value, case[0])) is not None:
program.push_context({name: Declared.from_obj(new_variables[name]) for name in new_variables})
return_value = interpret_statements([case[1]], program)
program.pop_context()
if not isinstance(return_value, NothingResult): return return_value
break
case DoWhileStatement(body, condition_):
assert condition_ is None # TODO
program.push_context({})
return_value = interpret_statements([body], program)
program.pop_context()
if not isinstance(return_value, NothingResult): return return_value
case WhileStatement(condition_, body):
while is_truthy(calculate_expression(condition_, program)):
program.push_context({})
return_value = interpret_statements([body], program)
program.pop_context()
match return_value:
case NothingResult(): pass
case ContinueResult(): continue
case BreakResult(): break
case ReturnResult(_): return return_value
case _: assert False, ("Unimplemented", return_value)
if not isinstance(return_value, NothingResult): return return_value
case AssertStatement(condition_, message_):
if not is_truthy(calculate_expression(condition_, program)):
if message_:
message = calculate_expression(message_, program)
assert isinstance(message, Str)
assert False, message.str
assert False, "Assertion failed"
case TypeDeclarationStatement(declaration):
program.declare_variable(declaration.name, calculate_type_expression(declaration.type, program))
case ForLoop(variable, array_, body):
array = calculate_expression(array_, program)
if isinstance(array, ListObject):
for value in array.list:
assert isinstance(value, Object)
assert value.get_type().is_subtype_of(array.type.type)
program.push_context({variable: Declared.from_obj(value)})
return_value = interpret_statements([body], program)
program.pop_context()
match return_value:
case NothingResult(): pass
case ReturnResult(_): return return_value
case _: assert False, ("Unimplemented", return_value)
elif isinstance(array, Dictionary):
for value_h in array.dict:
value = value_h.get_object()
assert value.get_type().is_subtype_of(array.type.key_type)
program.push_context({variable: Declared.from_obj(value)})
return_value = interpret_statements([body], program)
program.pop_context()
match return_value:
case NothingResult(): pass
case ReturnResult(_): return return_value
case _: assert False, ("Unimplemented", return_value)
case ContinueStatement(): return ContinueResult()
case BreakStatement(): return BreakResult()
case Import(file_):
# TODO: Maybe an inclusion system within a preprocessor maybe
file = calculate_expression(file_, program)
assert isinstance(file, Str), "Only strings are valid file paths!"
module = interpret_file(file.str, program.modules) if file.str not in program.modules else program.modules[file.str]
program.contexts[0] |= module
if file.str not in program.modules:
program.modules[file.str] = module
case TypeDefinition(name, expression_):
program.declare_and_assign_variable(name, TypeObject(calculate_type_expression(expression_, program)))
case _:
assert False, ("Unimplemented", statement)
return NothingResult()
def interpret_file(file_path: str, modules: Dict[str, Module]) -> Module:
# print(f"\tParsing {file_path}")
lexer = Lexer.from_file(file_path)
statements: List_[Statement] = []
while not lexer.check_token(EofToken()): statements.append(parse_statement(lexer))
# print(f"\tInterpreting {file_path}")
program = ProgramState(modules, [{variable: Declared.from_obj(variables[variable]) for variable in variables}, {}])
return_value = interpret_statements(statements, program)
# print(f"Finished {file_path}")
assert len(program.contexts) == 2
match return_value:
case NothingResult(): pass
case ReturnObject(_): assert False, "Cannot return from outside a function!"
case ContinueResult(): assert False, "Cannot continue from outside a loop!"
case BreakResult(): assert False, "Cannot break from outside a loop!"
case _: assert False, ("Unimplemented", return_value)
return program.contexts[1]

155
ppp_lexer.ppp Normal file
View File

@ -0,0 +1,155 @@
import "ppp_tokens.ppp";
struct Lexer {
source: str,
location: int,
line: int,
col: int,
peeked_token: OptionalToken
}
func new_lexer(source: str) -> Lexer {
return Lexer{
source = source,
location = 0,
line = 1,
col = 0,
peeked_token = OptionalToken.None
};
}
func lexer_from_file(path: str) -> Lexer return new_lexer(read(path));
func is_space(char: str) -> bool {
return char == " " || char == "\t" || char == "\n";
}
func is_digit(char: str) -> bool {
return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9";
}
func is_alpha(char: str) -> bool {
return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_";
}
func lexer_next_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) do {
lexer.peeked_token = OptionalToken.None;
return token;
}
}
while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do {
if lexer.source[lexer.location] == "\n" do {
lexer.line = lexer.line + 1;
lexer.col = 0;
}
lexer.location = lexer.location + 1;
}
if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof};
if is_digit(lexer.source[lexer.location]) do {
number_str: str = "";
while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do {
number_str = number_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
number: int = str_to_int(number_str);
return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)};
} else if is_alpha(lexer.source[lexer.location]) do {
word_str: str = "";
while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do {
word_str = word_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
match keyword_from_str(word_str) in {
case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)};
case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)};
}
assert false, "Identifier";
} else if lexer.source[lexer.location] == "\"" do {
lexer.location = lexer.location + 1;
string_str: str = "";
escaping: bool = false;
while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do {
escaping = escaping? false: lexer.source[lexer.location] == "\\";
string_str = string_str + lexer.source[lexer.location];
lexer.location = lexer.location + 1;
}
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)};
} else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)};
} else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)};
} else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)};
} else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)};
} else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)};
} else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)};
} else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)};
} else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
lexer.location = lexer.location + 2;
return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)};
} else {
match symbol_from_str(lexer.source[lexer.location]) in {
case Some(symbol) do {
lexer.location = lexer.location + 1;
return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)};
}
case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location];
}
}
}
func lexer_peek_token(lexer: Lexer) -> Token {
match lexer.peeked_token in {
case Some(token) return token;
case None do {
token: Token = lexer_next_token(lexer);
lexer.peeked_token = OptionalToken.Some(token);
return token;
}
}
}
func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool {
token: Token = lexer_peek_token(lexer);
return token.contents == expected;
}
func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
return OptionalToken.None;
}
func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken {
for token in tokens do {
if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
}
return OptionalToken.None;
}
func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token {
token: Token = lexer_next_token(lexer);
assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token));
return token;
}
func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool {
for token in tokens if lexer_check_token(lexer, token) return true;
return false;
}

147
ppp_lexer.py Normal file
View File

@ -0,0 +1,147 @@
from typing import Optional
from ppp_tokens import EofToken, IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken, Token, TokenContents
class Lexer:
def __init__(self, source: str) -> None:
self._source = source
self._location = 0
self._line = 1
self._col = 0
self._peeked_token: Optional[Token] = None
self._current: str = ""
@classmethod
def from_file(cls, path: str) -> 'Lexer':
with open(path) as f:
return cls(f.read())
def _advance(self) -> str:
assert self._location < len(self._source)
self._line, self._col = (self._line + 1, 0) if self._current == '\n' else (self._line, self._col + 1)
self._location += 1
self._current = self._source[self._location] if self._location < len(self._source) else ''
return self._current
# def _peek(self) -> str:
# assert self._location < len(self._source)-1
def next_token(self) -> Token:
if self._peeked_token is not None:
peeked_token, self._peeked_token = self._peeked_token, None
return peeked_token
while self._location < len(self._source) and self._source[self._location] in ' \t\n': self._advance()
if self._location >= len(self._source): return Token(self._line, self._col, '\0', EofToken())
match self._source[self._location]:
case c if c.isdigit():
start_location = self._location
while self._location < len(self._source) and self._source[self._location].isdigit(): self._location += 1
number = int(self._source[start_location:self._location])
return Token(self._line, self._col, self._source[start_location:self._location], NumberToken(number))
case c if c.isalpha() or c == "_":
start_location = self._location
while self._location < len(self._source) and (self._source[self._location].isalpha() or self._source[self._location] in '_'): self._location += 1
word = self._source[start_location:self._location]
try:
keyword = Keyword(word)
return Token(self._line, self._col, word, KeywordToken(keyword))
except ValueError:
try:
symbol = Symbol(word)
return Token(self._line, self._col, word, SymbolToken(symbol))
except ValueError:
return Token(self._line, self._col, word, IdentifierToken(word))
case '"':
# TODO: Escaping
self._location += 1
start_location = self._location
escaping = False
while self._location < len(self._source) and (self._source[self._location] != '"' or escaping):
escaping = self._source[self._location] == '\\' if not escaping else False
self._location += 1
string = self._source[start_location:self._location].encode('utf-8').decode('unicode_escape')
self._location += 1
return Token(self._line, self._col, self._source[start_location-1:self._location], StringToken(string))
# TODO: Make a proper Trie for this.
case '|' if self._location < len(self._source)-1 and self._source[self._location+1] == '|':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dpipe))
case '&' if self._location < len(self._source)-1 and self._source[self._location+1] == '&':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dampersand))
case '*' if self._location < len(self._source)-1 and self._source[self._location+1] == '*':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dasterisk))
case '-' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Arrow))
case '>' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.GreaterEqual))
case '<' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.LesserEqual))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dequal))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.EqualArrow))
case '!' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.NotEqual))
case c if c in Symbol._value2member_map_:
self._location += 1
return Token(self._line, self._col, self._source[self._location-1], SymbolToken(Symbol(c)))
case _:
assert False, ("Unimplemented", c, self._location)
assert False, "Unreachable"
def peek_token(self) -> Token:
if self._peeked_token is not None: return self._peeked_token
self._peeked_token = self.next_token()
return self._peeked_token
def assert_tokenkind(self, kind: type) -> Token:
token = self.next_token()
assert isinstance(token.contents, kind), (f"Expected {kind} but got {token.contents}!", self.next_token(), self.next_token(), self.next_token())
return token
def assert_token(self, expected: TokenContents) -> Token:
token = self.next_token()
assert token.contents == expected, (f"Expected {expected} but got {token.contents}!", self.next_token(), self.next_token())
return token
def check_token(self, expected: TokenContents) -> bool:
token = self.peek_token()
return token.contents == expected
def check_tokens(self, *expected: TokenContents) -> bool:
for token in expected:
if self.check_token(token):
return True
return False
def check_tokenkind(self, kind: type) -> bool:
token = self.peek_token()
return isinstance(token.contents, kind)
def take_tokenkind(self, kind: type) -> Optional[Token]:
if self.check_tokenkind(kind):
return self.next_token()
return None
def take_token(self, token: TokenContents) -> Optional[Token]:
if self.check_token(token):
return self.next_token()
return None
def take_tokens(self, *tokens: TokenContents) -> Optional[Token]:
for token in tokens:
if self.check_token(token):
return self.next_token()
return None

29
ppp_object.ppp Normal file
View File

@ -0,0 +1,29 @@
import "ppp_types.ppp";
import "ppp_ast.ppp";
enum Object {
Int(int),
Str(str),
Bool(bool),
Void,
Type(Type),
Tuple(Type, Object[]),
List(Type, Object[]),
Array(Type, int, Object[]),
Function(Type, (str, (str, Type)[], Type, Statement, (str, (str, Type)[], Type, Statement, Object[]) -> Object)),
Return(Type, Object),
EnumValue(Type, str, (dict[str, Object] | Object[])),
EnumStruct(Type, dict[str, Object]),
Struct(Type, dict[str, Object])
}
func object_get_type(object: Object) -> Type {
match object in {
case Int(_) return Type.Int;
case Str(_) return Type.Str;
case Type(_) return Type.Type;
case Function(type_, _) return type_;
case EnumValue(type_, _, _) return type_;
case _ assert false, "Unimplemented object_get_type %s" % object;
}
}

141
ppp_object.py Normal file
View File

@ -0,0 +1,141 @@
# This file exists because I wanted to keep ppp_stdlib.py and ppp_interpreter.py seperate but they both rely on this one class.
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Callable, Dict, List as List_, Tuple as Tuple_, Union as Union_
from ppp_ast import Statement
from ppp_types import ArrayType, DictionaryType, EnumType, FunctionType, ListType, ReturnType, StructType, TupleType, Type, Int as IntType, Str as StrType, Bool as BoolType, Void as VoidType, TypeType
class Object(ABC):
@abstractmethod
def get_type(self) -> Type: ...
def hash(self) -> 'Hashable':
assert False, f"{self.get_type().represent()} cannot be hashed."
@dataclass
class Int(Object):
num: int
def get_type(self) -> Type: return IntType
@dataclass
class Str(Object):
str: str
def get_type(self) -> Type: return StrType
def hash(self) -> 'HStr':
return HStr(self.str)
@dataclass
class Bool(Object):
value: bool
def get_type(self) -> Type: return BoolType
@dataclass
class Void_(Object):
def get_type(self) -> Type: return VoidType
Void = Void_()
@dataclass
class TypeObject(Object):
type: Type
def get_type(self) -> Type: return TypeType
@dataclass
class Tuple(Object):
type: TupleType
tuple: Tuple_[Object, ...]
def get_type(self) -> Type: return self.type
@dataclass
class List(Object):
type: ListType
list: List_[Object]
def get_type(self) -> Type: return self.type
@dataclass
class Array(Object):
type: ArrayType
array: List_[Object]
def get_type(self) -> Type: return self.type
@dataclass
class Function(Object):
type: FunctionType
function: Tuple_[str, List_[Tuple_[str, Type]], Type, Statement, Callable[..., Object]]
def get_type(self) -> Type: return self.type
@dataclass
class Return(Object):
type: ReturnType
value: Object
def get_type(self) -> Type: return self.type
@dataclass
class EnumValue(Object):
type: EnumType
name: str
values: List_[Object]
def get_type(self) -> Type: return self.type
def hash(self) -> 'HEnumValue':
return HEnumValue(self.type, self.name, [value.hash() for value in self.values])
@dataclass
class Struct(Object):
type: StructType
fields: Dict[str, Object]
def get_type(self) -> Type: return self.type
@dataclass
class Dictionary(Object):
type: DictionaryType
dict: 'Dict[Hashable, Object]'
def get_type(self) -> Type: return self.type
class Hashable(ABC):
@abstractmethod
def __hash__(self) -> int: ...
@abstractmethod
def get_object(self) -> Object: ...
@dataclass
class HInt(Hashable):
num: int
@dataclass
class HStr(Hashable):
str: str
def __hash__(self) -> int:
return hash(('object', 'str', self.str))
def get_object(self) -> Object:
return Str(self.str)
@dataclass
class HEnumValue(Hashable):
type: EnumType
name: str
values: List_[Hashable]
def __hash__(self) -> int:
return hash(('object', 'enum', self.type, self.name, tuple(self.values)))
def get_object(self) -> Object:
return EnumValue(self.type, self.name, [value.get_object() for value in self.values])

425
ppp_parser.ppp Normal file
View File

@ -0,0 +1,425 @@
import "ppp_tokens.ppp";
import "ppp_lexer.ppp";
import "ppp_ast.ppp";
func parse_type_union(lexer: Lexer) -> TypeExpression {
union_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Pipe))) union_types = union_types + [parse_type(lexer)];
if len(union_types) == 1 return union_types[0];
return TypeExpression.Union(union_types);
}
func parse_type_primary(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) return TypeExpression.Tuple([]);
types: TypeExpression[] = [parse_type_union(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) types = types + [parse_type_union(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
if len(types) == 1 do {
match types[0] in {
case Union(_) base_type = types[0];
case _ base_type = TypeExpression.Tuple(types);
}
} else base_type = TypeExpression.Tuple(types);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
assert false, "Unimplemented parse_type_primary array";
} else {
base_type = TypeExpression.Name(parse_identifier(lexer));
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Left)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
match symbol in {
case OpenSquare match lexer_peek_token(lexer).contents in {
case Number(number) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_type = TypeExpression.Array(base_type, number);
continue;
}
}
}
closing: Symbol;
match symbol in {
case OpenSquare closing = Symbol.CloseSquare;
case Left closing = Symbol.Right;
case _ assert false, "Unreachable";
}
match symbol in {
case OpenSquare if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(closing))) do {
base_type = TypeExpression.List(base_type);
continue;
}
}
generics: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) generics = generics + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(closing));
match base_type in {
case Specification(_, _) assert false, "Cannot specify an already specified type";
}
base_type = TypeExpression.Specification(base_type, generics);
}
case _ assert false, "Unreachable";
}
}
return base_type;
}
func parse_type(lexer: Lexer) -> TypeExpression {
base_type: TypeExpression = parse_type_primary(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) return base_type;
return_type: TypeExpression = parse_type(lexer);
match base_type in {
case Tuple(type_expressions) return TypeExpression.Function(type_expressions, return_type);
}
return TypeExpression.Function([base_type], return_type);
}
func parse_type_declaration(lexer: Lexer) -> TypeDeclaration {
entry_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
entry_type: TypeExpression = parse_type(lexer);
return TypeDeclaration{name=entry_name, type_=entry_type};
}
func parse_enum_entry(lexer: Lexer) -> EnumEntry {
entry_name: str = parse_identifier(lexer);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) return EnumEntry{name=entry_name, types=[]};
entry_types: TypeExpression[] = [parse_type(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) entry_types = entry_types + [parse_type(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
return EnumEntry{name=entry_name, types=entry_types};
}
func parse_struct_argument(lexer: Lexer) -> (str, Expression) {
parameter: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
return (parameter, parse_expression(lexer));
}
func parse_dict_entry(lexer: Lexer) -> (Expression, Expression) {
key: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
return (key, parse_expression(lexer));
}
func parse_primary(lexer: Lexer) -> Expression {
base_expression: Expression;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Open))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.Tuple([]);
else {
elements: Expression[] = [parse_expression(lexer)];
singleton: bool = false;
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) do {
if lexer_check_token(lexer, TokenContents.Symbol(Symbol.Close)) do {
singleton = true;
break;
}
elements = elements + [parse_expression(lexer)];
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = singleton || len(elements) > 1? Expression.Tuple(elements): elements[0];
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenSquare))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseSquare))) base_expression = Expression.Array([]);
else {
expressions: Expression[] = [parse_expression(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.LoopComrehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.Array(expressions);
}
}
} else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.Dictionary([]);
else {
expressions: (Expression, Expression)[] = [parse_dict_entry(lexer)];
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.DictComprehension(expressions[0], variable, expression);
} else {
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) expressions = expressions + [parse_dict_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.Dictionary(expressions);
}
}
} else {
token: Token = lexer_next_token(lexer);
match token.contents in {
case String(string) base_expression = Expression.String(string);
case Number(number) base_expression = Expression.Number(number);
case Identifier(string) base_expression = Expression.Variable(string);
case _ assert false, "Expected identifier, but got %s!" % token_to_str(token);
}
}
while lexer_check_tokens(lexer, [TokenContents.Symbol(Symbol.Open), TokenContents.Symbol(Symbol.OpenSquare), TokenContents.Symbol(Symbol.Dot), TokenContents.Symbol(Symbol.OpenCurly)]) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) match symbol in {
case Dot base_expression = Expression.FieldAccess(base_expression, parse_identifier(lexer));
case Open do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) base_expression = Expression.FunctionCall(base_expression, []);
else {
arguments: Expression[] = [parse_expression(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) arguments = arguments + [parse_expression(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
base_expression = Expression.FunctionCall(base_expression, arguments);
}
}
case OpenSquare do {
index: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseSquare));
base_expression = Expression.ArrayAccess(base_expression, index);
}
case OpenCurly do {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) base_expression = Expression.StructInstantiation(base_expression, []);
else {
struct_arguments: (str, Expression)[] = [parse_struct_argument(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_arguments = struct_arguments + [parse_struct_argument(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
base_expression = Expression.StructInstantiation(base_expression, struct_arguments);
}
}
case _ assert false, "Unimplemented parse_primary symbol %s" % symbol_to_str(symbol);
}
case _ assert false, "Unimplemented parse_primary %s" % token_to_str(lexer_next_token(lexer));
}
}
return base_expression;
}
func parse_unary(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Tilde))) return Expression.Bnot(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Exclamation))) return Expression.Not(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Plus))) return Expression.UnaryPlus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Dash))) return Expression.UnaryMinus(parse_unary(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_unary(lexer));
return parse_primary(lexer);
}
precedences: dict[Symbol, (Expression, Expression) -> Expression][] = [
{Symbol.Dpipe: Expression.Or},
{Symbol.Dampersand: Expression.And},
{Symbol.Pipe: Expression.Bor},
{Symbol.Carot: Expression.Bxor},
{Symbol.Ampersand: Expression.Band},
{Symbol.Dequal: Expression.Equal, Symbol.NotEqual: Expression.NotEqual},
{Symbol.Left: Expression.LessThan, Symbol.Right: Expression.GreaterThan, Symbol.LesserEqual: Expression.LessThanOrEqual, Symbol.GreaterEqual: Expression.GreaterThanOrEqual},
{Symbol.Dleft: Expression.ShiftLeft, Symbol.Dright: Expression.ShiftRight},
{Symbol.Plus: Expression.Addition, Symbol.Dash: Expression.Subtract},
{Symbol.Asterisk: Expression.Multiplication, Symbol.Slash: Expression.Division, Symbol.Percent: Expression.Modulo}
];
func parse_expression_at_level(lexer: Lexer, level: int) -> Expression {
if level >= len(precedences) return parse_unary(lexer);
left: Expression = parse_expression_at_level(lexer, level+1);
tokens: TokenContents[] = [TokenContents.Symbol(symbol) for symbol in precedences[level]];
while lexer_check_tokens(lexer, tokens) do {
match lexer_next_token(lexer).contents in {
case Symbol(symbol) do {
expressor: (Expression, Expression) -> Expression = precedences[level][symbol];
left = expressor(left, parse_expression_at_level(lexer, level+1));
}
case _ assert false, "Unreachable";
}
}
return left;
}
func parse_ternary(lexer: Lexer) -> Expression {
expression: Expression = parse_expression_at_level(lexer, 0);
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.QuestionMark))) return expression;
if_true: Expression = parse_expression_at_level(lexer, 0);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Colon));
if_false: Expression = parse_ternary(lexer);
return Expression.Ternary(expression, if_true, if_false);
}
func parse_expression(lexer: Lexer) -> Expression {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Return))) return Expression.Return(parse_expression(lexer));
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Lambda))) do {
parameters: TypeDeclaration[];
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.EqualArrow))) parameters = [];
else do {
parameters = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) parameters = parameters + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.EqualArrow));
}
return Expression.Lambda(parameters, parse_expression(lexer));
}
return parse_ternary(lexer);
}
func parse_identifier(lexer: Lexer) -> str {
identifier_token: Token = lexer_next_token(lexer);
match identifier_token.contents in {
case Identifier(identifier) return identifier;
case _ assert false, "Expected identifier, but got %s!" % token_to_str(identifier_token);
}
}
func parse_number(lexer: Lexer) -> int {
number_token: Token = lexer_next_token(lexer);
match number_token.contents in {
case Number(number) return number;
case _ assert false, "Expected number!";
}
}
func parse_string(lexer: Lexer) -> str {
string_token: Token = lexer_next_token(lexer);
match string_token.contents in {
case String(string) return string;
case _ assert false, "Expected string!";
}
}
func is_valid_target(expression: Expression) -> bool {
match expression in {
case FieldAccess(subexpression, _) return is_valid_target(subexpression);
case Variable(_) return true;
case ArrayAccess(array, _) return is_valid_target(array);
case _ assert false, "Unimplemented is_valid_target %s" % expression;
}
}
func parse_statement(lexer: Lexer) -> Statement {
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Enum))) do {
enum_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.EnumDefinition(enum_name, []);
enum_entries: EnumEntry[] = [parse_enum_entry(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) enum_entries = enum_entries + [parse_enum_entry(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.EnumDefinition(enum_name, enum_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Struct))) do {
struct_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) return Statement.StructDefinition(struct_name, []);
struct_entries: TypeDeclaration[] = [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) struct_entries = struct_entries + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.StructDefinition(struct_name, struct_entries);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Func))) do {
function_name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Open));
function_arguments: TypeDeclaration[] = [];
if !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Close))) do {
function_arguments = function_arguments + [parse_type_declaration(lexer)];
while is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma))) function_arguments = function_arguments + [parse_type_declaration(lexer)];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Close));
}
function_return_type: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Arrow))) function_return_type = OptionalTypeExpression.Some(parse_type(lexer));
function_body: Statement = parse_statement(lexer);
return Statement.FunctionDefinition(function_name, function_arguments, function_return_type, function_body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.If))) do {
return Statement.If(parse_expression(lexer), parse_statement(lexer), is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Else)))? OptionalStatement.Some(parse_statement(lexer)): OptionalStatement.None);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Match))) do {
value: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.OpenCurly));
cases: (Expression, Statement)[] = [];
while is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Case))) cases = cases + [(parse_expression(lexer), parse_statement(lexer))];
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.CloseCurly));
return Statement.Match(value, cases);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Assert))) do {
condition: Expression = parse_expression(lexer);
message: OptionalExpression = is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Comma)))? OptionalExpression.Some(parse_expression(lexer)): OptionalExpression.None;
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assert(condition, message);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Do))) do {
body: Statement = parse_statement(lexer);
condition: OptionalExpression = OptionalExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
condition = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
}
return Statement.DoWhile(body, condition);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.While))) do {
return Statement.While(parse_expression(lexer), parse_statement(lexer));
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.For))) do {
variable: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Keyword(Keyword.In));
expression: Expression = parse_expression(lexer);
body: Statement = parse_statement(lexer);
return Statement.ForLoop(variable, expression, body);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Continue))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Continue;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Break))) do {
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Break;
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Import))) do {
file: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Import(file);
} else if is_some_token(lexer_take_token(lexer, TokenContents.Keyword(Keyword.Type))) do {
name: str = parse_identifier(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Equal));
type_expression: TypeExpression = parse_type(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.TypeDefinition(name, type_expression);
}
else if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.OpenCurly))) do {
statements: Statement[] = [];
while !is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.CloseCurly))) statements = statements + [parse_statement(lexer)];
return Statement.Statements(statements);
} else {
expression: Expression = parse_expression(lexer);
type_: OptionalTypeExpression = OptionalTypeExpression.None;
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Colon))) do {
match expression in {
case Variable(_) type_ = OptionalTypeExpression.Some(parse_type(lexer));
case _ assert false, "Invalid target";
}
}
if is_some_token(lexer_take_token(lexer, TokenContents.Symbol(Symbol.Equal))) do {
assert is_valid_target(expression), "Invalid target!";
right_expression: Expression = parse_expression(lexer);
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
return Statement.Assignment(expression, right_expression, type_);
}
lexer_assert_token(lexer, TokenContents.Symbol(Symbol.Semicolon));
match expression in {
case Variable(name) match type_ in {
case Some(type_expression) return Statement.TypeDeclaration(TypeDeclaration{name=name, type_=type_expression});
}
}
return Statement.Expression(expression);
}
}

383
ppp_parser.py Normal file
View File

@ -0,0 +1,383 @@
from typing import Callable, Dict, List, Optional, Tuple
from ppp_lexer import Lexer
from ppp_tokens import IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken
from ppp_ast import *
def parse_identifier(lexer: Lexer) -> str:
identifier = lexer.assert_tokenkind(IdentifierToken)
assert isinstance(identifier.contents, IdentifierToken)
return identifier.contents.identifier
def parse_number(lexer: Lexer) -> int:
number = lexer.assert_tokenkind(NumberToken)
assert isinstance(number.contents, NumberToken)
return number.contents.number
def parse_string(lexer: Lexer) -> str:
string = lexer.assert_tokenkind(StringToken)
assert isinstance(string.contents, StringToken)
return string.contents.string
def parse_type_primary(lexer: Lexer) -> TypeExpression:
base_type: TypeExpression
if lexer.take_token(SymbolToken(Symbol.Open)):
if lexer.take_token(SymbolToken(Symbol.Close)): return TupleTypeExpr([])
def parse_union(lexer: Lexer) -> TypeExpression:
union_types: List[TypeExpression] = [parse_type(lexer)]
while lexer.take_token(SymbolToken(Symbol.Pipe)):
union_types.append(parse_type(lexer))
if len(union_types) == 1:
return union_types[0]
return UnionTypeExpr(union_types)
types: List[TypeExpression] = [parse_union(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
types.append(parse_union(lexer))
lexer.assert_token(SymbolToken(Symbol.Close))
if len(types) == 1 and isinstance(types[0], UnionTypeExpr):
base_type = types[0]
else:
base_type = TupleTypeExpr(types)
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
type = parse_type(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_type = ListTypeExpr(type)
else:
name = parse_identifier(lexer)
base_type = TypeName(name)
while (opening_token := lexer.take_tokens(SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Left))):
assert isinstance(opening_token.contents, SymbolToken)
opening = opening_token.contents.symbol
if opening == Symbol.OpenSquare and lexer.check_tokenkind(NumberToken):
number = parse_number(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_type = ArrayTypeExpr(base_type, number)
continue
opening2closing_map: Dict[Symbol, Symbol] = {
Symbol.OpenSquare: Symbol.CloseSquare,
Symbol.Left: Symbol.Right
}
assert opening in opening2closing_map, "Unreachable"
closing = opening2closing_map[opening]
if opening == Symbol.OpenSquare and lexer.take_token(SymbolToken(closing)):
base_type = ListTypeExpr(base_type)
continue
generics: List[TypeExpression] = [parse_type(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)): generics.append(parse_type(lexer))
lexer.assert_token(SymbolToken(closing))
assert not isinstance(base_type, TypeSpecification)
base_type = TypeSpecification(base_type, generics)
return base_type
def parse_type(lexer: Lexer) -> TypeExpression:
base_type = parse_type_primary(lexer)
if not lexer.take_token(SymbolToken(Symbol.Arrow)): return base_type
return_type = parse_type(lexer)
return FunctionTypeExpr([base_type] if not isinstance(base_type, TupleTypeExpr) else base_type.types, return_type)
def parse_type_declaration(lexer: Lexer) -> TypeDeclaration:
entry_name = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Colon))
entry_type = parse_type(lexer)
return TypeDeclaration(entry_name, entry_type)
def parse_enum_entry(lexer: Lexer) -> EnumEntry:
entry_name = parse_identifier(lexer)
if not lexer.take_token(SymbolToken(Symbol.Open)):
return EnumEntry(entry_name, [])
entry_types: List[TypeExpression] = [parse_type(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
entry_types.append(parse_type(lexer))
lexer.assert_token(SymbolToken(Symbol.Close))
return EnumEntry(entry_name, entry_types)
def parse_primary(lexer: Lexer) -> Expression:
base_expression: Expression
if lexer.take_token(SymbolToken(Symbol.Open)):
if lexer.take_token(SymbolToken(Symbol.Close)):
base_expression = TupleExpr([])
else:
elements: List[Expression] = [parse_expression(lexer)]
singleton: bool = False
while lexer.take_token(SymbolToken(Symbol.Comma)):
if lexer.check_token(SymbolToken(Symbol.Close)) and len(elements) == 1:
singleton = True
break
elements.append(parse_expression(lexer))
lexer.assert_token(SymbolToken(Symbol.Close))
if singleton or len(elements) > 1:
base_expression = TupleExpr(elements)
else:
base_expression = elements[0]
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
if lexer.take_token(SymbolToken(Symbol.CloseSquare)):
base_expression = Array([])
else:
expressions: List[Expression] = [parse_expression(lexer)]
if lexer.take_token(KeywordToken(Keyword.For)):
variable = parse_identifier(lexer) # TODO: Pattern matching
lexer.assert_token(KeywordToken(Keyword.In))
expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = LoopComprehension(expressions[0], variable, expression)
else:
while lexer.take_token(SymbolToken(Symbol.Comma)):
expressions.append(parse_expression(lexer))
lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = Array(expressions)
elif lexer.take_token(SymbolToken(Symbol.OpenCurly)):
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = DictionaryExpr([])
else:
def parse_dict_entry() -> Tuple[Expression, Expression]:
key = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Colon))
return (key, parse_expression(lexer))
dict_entries: List[Tuple[Expression, Expression]] = [parse_dict_entry()]
if lexer.take_token(KeywordToken(Keyword.For)):
variable = parse_identifier(lexer) # TODO: Pattern matching
lexer.assert_token(KeywordToken(Keyword.In))
expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = DictComprehension(dict_entries[0], variable, expression)
else:
while lexer.take_token(SymbolToken(Symbol.Comma)):
dict_entries.append(parse_dict_entry())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = DictionaryExpr(dict_entries)
elif lexer.check_tokenkind(StringToken):
base_expression = String(parse_string(lexer))
elif lexer.check_tokenkind(NumberToken):
base_expression = Number(parse_number(lexer))
else:
base_expression = Variable(parse_identifier(lexer))
while (token := lexer.take_tokens(SymbolToken(Symbol.Open), SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Dot), SymbolToken(Symbol.OpenCurly))):
match token.contents:
case SymbolToken(symbol):
match symbol:
case Symbol.Dot:
field = parse_identifier(lexer)
base_expression = FieldAccess(base_expression, field)
case Symbol.Open:
if lexer.take_token(SymbolToken(Symbol.Close)):
base_expression = FunctionCall(base_expression, [])
else:
arguments: List[Expression] = [parse_expression(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
arguments.append(parse_expression(lexer))
lexer.assert_token(SymbolToken(Symbol.Close))
base_expression = FunctionCall(base_expression, arguments)
case Symbol.OpenSquare:
index = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = ArrayAccess(base_expression, index)
case Symbol.OpenCurly:
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = StructInstantiation(base_expression, [])
else:
def parse_argument() -> Tuple[str, Expression]:
parameter = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
return (parameter, parse_expression(lexer))
struct_arguments: List[Tuple[str, Expression]] = [parse_argument()]
while lexer.take_token(SymbolToken(Symbol.Comma)): struct_arguments.append(parse_argument())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = StructInstantiation(base_expression, struct_arguments)
case _: assert False, ("Unimplemented", symbol)
case _: assert False, ("Unimplemented", token)
return base_expression
def parse_unary(lexer: Lexer) -> Expression:
if lexer.take_token(SymbolToken(Symbol.Tilde)): return Bnot(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Exclamation)): return Not(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Plus)): return UnaryPlus(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Dash)): return UnaryMinus(parse_unary(lexer))
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_unary(lexer))
return parse_primary(lexer)
Precedence = Dict[Symbol, Callable[[Expression, Expression], Expression]]
precedences: List[Precedence] = [
{Symbol.Dpipe: Or},
{Symbol.Dampersand: And},
{Symbol.Pipe: Bor},
{Symbol.Carot: Bxor},
{Symbol.Ampersand: Band},
{Symbol.Dequal: Equal, Symbol.NotEqual: NotEqual},
{Symbol.Left: LessThan, Symbol.Right: GreaterThan, Symbol.LesserEqual: LessThanOrEqual, Symbol.GreaterEqual: GreaterThanOrEqual},
{Symbol.Dleft: ShiftLeft, Symbol.Dright: ShiftRight},
{Symbol.Plus: Addition, Symbol.Dash: Subtract},
{Symbol.Asterisk: Multiplication, Symbol.Slash: Division, Symbol.Percent: Modulo}
]
def parse_expression_at_level(lexer: Lexer, level: int=0) -> Expression:
if level >= len(precedences): return parse_unary(lexer)
left = parse_expression_at_level(lexer, level+1)
tokens = [SymbolToken(symbol) for symbol in precedences[level]]
while (token := lexer.take_tokens(*tokens)):
assert isinstance(token.contents, SymbolToken)
left = precedences[level][token.contents.symbol](left, parse_expression_at_level(lexer, level+1))
return left
def parse_ternary(lexer: Lexer) -> Expression:
expression = parse_expression_at_level(lexer)
if not lexer.take_token(SymbolToken(Symbol.QuestionMark)): return expression
if_true = parse_expression_at_level(lexer)
lexer.assert_token(SymbolToken(Symbol.Colon))
if_false = parse_ternary(lexer)
return Ternary(expression, if_true, if_false)
def parse_expression(lexer: Lexer) -> Expression:
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_expression(lexer))
if lexer.take_token(KeywordToken(Keyword.Lambda)):
parameters: List[TypeDeclaration]
if lexer.take_token(SymbolToken(Symbol.EqualArrow)):
parameters = []
else:
parameters = [parse_type_declaration(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
parameters.append(parse_type_declaration(lexer))
lexer.assert_token(SymbolToken(Symbol.EqualArrow))
return Lambda(parameters, parse_expression(lexer))
return parse_ternary(lexer)
def is_valid_target(expression: Expression) -> bool:
match expression:
case FieldAccess(subexpression, _): return is_valid_target(subexpression)
case Variable(_): return True
case ArrayAccess(array, _): return is_valid_target(array)
case _: assert False, ("Unimplemeneted", expression)
assert False, "Unreachable"
def parse_statement(lexer: Lexer) -> Statement:
if lexer.take_token(KeywordToken(Keyword.Enum)):
enum_name = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.OpenCurly))
if lexer.take_token(SymbolToken(Symbol.CloseCurly)): return EnumDefinition(enum_name, [])
enum_entries: List[EnumEntry] = [parse_enum_entry(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
enum_entries.append(parse_enum_entry(lexer))
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
return EnumDefinition(enum_name, enum_entries)
elif lexer.take_token(KeywordToken(Keyword.Struct)):
struct_name = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.OpenCurly))
if lexer.take_token(SymbolToken(Symbol.CloseCurly)): return StructDefinition(struct_name, [])
struct_entries: List[TypeDeclaration] = [parse_type_declaration(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)):
struct_entries.append(parse_type_declaration(lexer))
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
return StructDefinition(struct_name, struct_entries)
elif lexer.take_token(KeywordToken(Keyword.Func)):
function_name = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Open))
function_arguments: List[TypeDeclaration] = []
if not lexer.take_token(SymbolToken(Symbol.Close)):
function_arguments.append(parse_type_declaration(lexer))
while lexer.take_token(SymbolToken(Symbol.Comma)):
function_arguments.append(parse_type_declaration(lexer))
lexer.assert_token(SymbolToken(Symbol.Close))
function_return_type: Optional[TypeExpression] = None
if lexer.take_token(SymbolToken(Symbol.Arrow)):
function_return_type = parse_type(lexer)
function_body = parse_statement(lexer)
return FunctionDefinition(function_name, function_arguments, function_return_type, function_body)
elif lexer.take_token(KeywordToken(Keyword.If)):
return IfStatement(
parse_expression(lexer),
parse_statement(lexer),
parse_statement(lexer) if lexer.take_token(KeywordToken(Keyword.Else)) else None
)
elif lexer.take_token(KeywordToken(Keyword.Else)):
assert False, "Unmatched else"
elif lexer.take_token(KeywordToken(Keyword.While)):
return WhileStatement(
parse_expression(lexer),
parse_statement(lexer)
)
elif lexer.take_token(KeywordToken(Keyword.Break)):
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return BreakStatement()
elif lexer.take_token(KeywordToken(Keyword.Continue)):
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return ContinueStatement()
elif lexer.take_token(KeywordToken(Keyword.Do)):
body = parse_statement(lexer)
condition: Optional[Expression] = None
if lexer.take_token(KeywordToken(Keyword.While)):
condition = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return DoWhileStatement(body, condition)
elif lexer.take_token(KeywordToken(Keyword.Match)):
value = parse_expression(lexer)
lexer.assert_token(KeywordToken(Keyword.In)) # to prevent it from parsing it as a struct instantiation
lexer.assert_token(SymbolToken(Symbol.OpenCurly))
cases: List[Tuple[Expression, Statement]] = []
while lexer.take_token(KeywordToken(Keyword.Case)): cases.append((parse_expression(lexer), parse_statement(lexer)))
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
return MatchStatement(value, cases)
elif lexer.take_token(KeywordToken(Keyword.Assert)):
condition = parse_expression(lexer)
message = parse_expression(lexer) if lexer.take_token(SymbolToken(Symbol.Comma)) else None
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return AssertStatement(condition, message)
elif lexer.take_token(KeywordToken(Keyword.For)):
variable = parse_identifier(lexer) # TODO: Allow for pattern matching here
lexer.assert_token(KeywordToken(Keyword.In))
expression = parse_expression(lexer)
body = parse_statement(lexer)
return ForLoop(variable, expression, body)
elif lexer.take_token(KeywordToken(Keyword.Import)):
file = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return Import(file)
elif lexer.take_token(KeywordToken(Keyword.Type)):
name = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
type_expression = parse_type(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return TypeDefinition(name, type_expression)
elif lexer.check_tokenkind(KeywordToken) and not lexer.check_tokens(KeywordToken(Keyword.Return), KeywordToken(Keyword.Lambda)):
assert False, ("Unimplemented", lexer.next_token(), lexer.next_token(), lexer.next_token())
elif lexer.take_token(SymbolToken(Symbol.OpenCurly)):
statements: List[Statement] = []
while not lexer.take_token(SymbolToken(Symbol.CloseCurly)):
statements.append(parse_statement(lexer))
return Statements(statements)
else:
expression = parse_expression(lexer)
type: Optional[TypeExpression] = None
if lexer.take_token(SymbolToken(Symbol.Colon)):
assert isinstance(expression, Variable), "Cannot declare types for anything besides a variable"
type = parse_type(lexer)
if lexer.take_token(SymbolToken(Symbol.Equal)):
assert is_valid_target(expression), ("Invalid target!", expression)
right_expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return Assignment(expression, right_expression, type)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
if type and isinstance(expression, Variable):
return TypeDeclarationStatement(TypeDeclaration(expression.name, type))
return ExpressionStatement(expression)

7
ppp_stdlib.ppp Normal file
View File

@ -0,0 +1,7 @@
import "ppp_object.ppp";
import "ppp_types.ppp";
variables: dict[str, Object] = {
"str": Object.Type(Type.Str),
"bool": Object.Type(Type.Bool)
};

110
ppp_stdlib.py Normal file
View File

@ -0,0 +1,110 @@
from typing import Callable, Dict, List, Tuple
from ppp_ast import Statements
from ppp_object import Bool, EnumValue, Int, Object, Function, Str, TypeObject, Void, List as ListObject
from ppp_types import Bool as BoolType, DictionaryType, FunctionType, GenericType, Int as IntType, Str as StrType, Type, TypeType, VariableType, Void as VoidType, Object as ObjectType, UnionType, ListType
def PythonFunction(name: str, parameters: List[Tuple[str, Type]], return_type: Type, func: Callable[..., Object]) -> Object:
return Function(FunctionType([parameter[1] for parameter in parameters], return_type), (name, parameters, return_type, Statements([]), lambda _0, _1, _2, _3, *args: func(*args)))
def print_impl(str_: Object) -> Object:
assert isinstance(str_, Str)
print(str_.str, end='')
return Void
Print = PythonFunction("print", [('string', StrType)], VoidType, print_impl)
def int_to_str_impl(int_: Object) -> Object:
assert isinstance(int_, Int)
return Str(str(int_.num))
IntToStr = PythonFunction("int_to_str", [('integer', IntType)], StrType, int_to_str_impl)
def debug_print_impl(obj: Object) -> Object:
print(obj)
return Void
DebugPrint = PythonFunction("debug_print", [('object', ObjectType)], VoidType, debug_print_impl)
def read_impl(str_: Object) -> Object:
assert isinstance(str_, Str)
with open(str_.str) as f: return Str(f.read())
Read = PythonFunction("read", [('file_path', StrType)], StrType, read_impl)
def len_impl(list_: Object) -> Object:
assert list_.get_type().is_indexable(), list_
match list_:
case Str(str): return Int(len(str))
case ListObject(_, list): return Int(len(list))
case _: assert False, ("Unimplemented", list_)
assert False
Len = PythonFunction("len", [('list', UnionType([ListType(VariableType("")), StrType]))], IntType, len_impl)
def str_to_int_impl(str_: Object) -> Object:
assert isinstance(str_, Str)
assert str_.str.isdigit()
return Int(int(str_.str))
StrToInt = PythonFunction("str_to_int", [('string', StrType)], IntType, str_to_int_impl)
def range_impl(start: Object, end: Object) -> Object:
assert isinstance(start, Int)
assert isinstance(end, Int)
return ListObject(ListType(IntType), [Int(i) for i in range(start.num, end.num)])
Range = PythonFunction("range", [('start', IntType), ('end', IntType)], ListType(IntType), range_impl)
def join_by_impl(seperator: Object, list: Object) -> Object:
assert isinstance(seperator, Str)
assert isinstance(list, ListObject)
if len(list.list) == 0: return Str("")
assert list.type.type.is_subtype_of(StrType), list
new_array: List[str] = []
for str_ in list.list:
assert isinstance(str_, Str)
new_array.append(str_.str)
return Str(seperator.str.join(new_array))
JoinBy = PythonFunction("join_by", [('seperator', StrType), ('list', ListType(StrType))], StrType, join_by_impl)
def id_impl(obj: Object) -> Object:
match obj:
case EnumValue(_, _, _): return Int(id(obj))
case _: assert False, ("Unimplemented", obj)
Id = PythonFunction("id", [('object', ObjectType)], IntType, id_impl)
StrTypeObj = TypeObject(StrType)
IntTypeObj = TypeObject(IntType)
VoidTypeObj = TypeObject(VoidType)
BoolTypeObj = TypeObject(BoolType)
DictTypeObj = TypeObject(GenericType([VariableType("K"), VariableType("V")], DictionaryType(VariableType("K"), VariableType("V"))))
True_ = Bool(True)
False_ = Bool(False)
NoneObj = Void
variables: Dict[str, Object] = {
'print': Print,
'true': True_,
'false': False_,
'int_to_str': IntToStr,
'str': StrTypeObj,
'int': IntTypeObj,
'bool': BoolTypeObj,
'void': VoidTypeObj,
'dict': DictTypeObj,
'debug_print': DebugPrint,
'read': Read,
'len': Len,
'str_to_int': StrToInt,
'none': NoneObj,
'range': Range,
'join_by': JoinBy,
'id': Id
}

237
ppp_tokens.ppp Normal file
View File

@ -0,0 +1,237 @@
enum Keyword {
Enum,
Struct,
Func,
If,
Else,
While,
Break,
Continue,
Do,
For,
To,
In,
Match,
Case,
Assert,
Return,
Lambda,
Import,
Type
}
enum OptionalKeyword {
Some(Keyword),
None
}
func keyword_from_str(keyword: str) -> OptionalKeyword {
if keyword == "enum" return OptionalKeyword.Some(Keyword.Enum);
if keyword == "struct" return OptionalKeyword.Some(Keyword.Struct);
if keyword == "func" return OptionalKeyword.Some(Keyword.Func);
if keyword == "if" return OptionalKeyword.Some(Keyword.If);
if keyword == "else" return OptionalKeyword.Some(Keyword.Else);
if keyword == "while" return OptionalKeyword.Some(Keyword.While);
if keyword == "break" return OptionalKeyword.Some(Keyword.Break);
if keyword == "continue" return OptionalKeyword.Some(Keyword.Continue);
if keyword == "do" return OptionalKeyword.Some(Keyword.Do);
if keyword == "for" return OptionalKeyword.Some(Keyword.For);
if keyword == "to" return OptionalKeyword.Some(Keyword.To);
if keyword == "in" return OptionalKeyword.Some(Keyword.In);
if keyword == "match" return OptionalKeyword.Some(Keyword.Match);
if keyword == "case" return OptionalKeyword.Some(Keyword.Case);
if keyword == "assert" return OptionalKeyword.Some(Keyword.Assert);
if keyword == "return" return OptionalKeyword.Some(Keyword.Return);
if keyword == "lambda" return OptionalKeyword.Some(Keyword.Lambda);
if keyword == "import" return OptionalKeyword.Some(Keyword.Import);
if keyword == "type" return OptionalKeyword.Some(Keyword.Type);
return OptionalKeyword.None;
}
func keyword_to_str(keyword: Keyword) -> str {
match keyword in {
case Enum return "enum";
case Struct return "struct";
case Func return "func";
case If return "if";
case Else return "else";
case While return "while";
case Break return "break";
case Continue return "continue";
case Do return "do";
case For return "for";
case To return "to";
case In return "in";
case Match return "match";
case Case return "case";
case Assert return "assert";
case Return return "return";
case Lambda return "lambda";
case Import return "import";
case Type return "type";
}
assert false, "Invalid keyword";
}
enum Symbol {
Open,
Close,
OpenCurly,
CloseCurly,
Comma,
OpenSquare,
CloseSquare,
Colon,
Left,
Right,
Arrow,
Semicolon,
Equal,
Dequal,
Exclamation,
NotEqual,
Dot,
Plus,
Dash,
Asterisk,
Dasterisk,
Slash,
QuestionMark,
Ampersand,
Dampersand,
Pipe,
Dpipe,
Dleft,
Dright,
GreaterEqual,
LesserEqual,
Percent,
Tilde,
Carot
}
enum OptionalSymbol {
Some(Symbol),
None
}
func symbol_from_str(symbol: str) -> OptionalSymbol {
if symbol == "(" return OptionalSymbol.Some(Symbol.Open);
if symbol == ")" return OptionalSymbol.Some(Symbol.Close);
if symbol == "{" return OptionalSymbol.Some(Symbol.OpenCurly);
if symbol == "}" return OptionalSymbol.Some(Symbol.CloseCurly);
if symbol == "," return OptionalSymbol.Some(Symbol.Comma);
if symbol == "[" return OptionalSymbol.Some(Symbol.OpenSquare);
if symbol == "]" return OptionalSymbol.Some(Symbol.CloseSquare);
if symbol == ":" return OptionalSymbol.Some(Symbol.Colon);
if symbol == "<" return OptionalSymbol.Some(Symbol.Left);
if symbol == ">" return OptionalSymbol.Some(Symbol.Right);
if symbol == "->" return OptionalSymbol.Some(Symbol.Arrow);
if symbol == ";" return OptionalSymbol.Some(Symbol.Semicolon);
if symbol == "=" return OptionalSymbol.Some(Symbol.Equal);
if symbol == "==" return OptionalSymbol.Some(Symbol.Dequal);
if symbol == "!" return OptionalSymbol.Some(Symbol.Exclamation);
if symbol == "!=" return OptionalSymbol.Some(Symbol.NotEqual);
if symbol == "." return OptionalSymbol.Some(Symbol.Dot);
if symbol == "+" return OptionalSymbol.Some(Symbol.Plus);
if symbol == "-" return OptionalSymbol.Some(Symbol.Dash);
if symbol == "*" return OptionalSymbol.Some(Symbol.Asterisk);
if symbol == "**" return OptionalSymbol.Some(Symbol.Dasterisk);
if symbol == "/" return OptionalSymbol.Some(Symbol.Slash);
if symbol == "?" return OptionalSymbol.Some(Symbol.QuestionMark);
if symbol == "&" return OptionalSymbol.Some(Symbol.Ampersand);
if symbol == "&&" return OptionalSymbol.Some(Symbol.Dampersand);
if symbol == "|" return OptionalSymbol.Some(Symbol.Pipe);
if symbol == "||" return OptionalSymbol.Some(Symbol.Dpipe);
if symbol == "<<" return OptionalSymbol.Some(Symbol.Dleft);
if symbol == ">>" return OptionalSymbol.Some(Symbol.Dright);
if symbol == ">=" return OptionalSymbol.Some(Symbol.GreaterEqual);
if symbol == "<=" return OptionalSymbol.Some(Symbol.LesserEqual);
if symbol == "%" return OptionalSymbol.Some(Symbol.Percent);
if symbol == "~" return OptionalSymbol.Some(Symbol.Tilde);
if symbol == "^" return OptionalSymbol.Some(Symbol.Carot);
assert false, "Unimplemented symbol '%s'" % symbol;
}
func symbol_to_str(symbol: Symbol) -> str {
match symbol in {
case Open return "(";
case Close return ")";
case OpenCurly return "{";
case CloseCurly return "}";
case Comma return ",";
case OpenSquare return "[";
case CloseSquare return "]";
case Colon return ":";
case Left return "<";
case Right return ">";
case Arrow return "->";
case Semicolon return ";";
case Equal return "=";
case Dequal return "==";
case Exclamation return "!";
case NotEqual return "!=";
case Dot return ".";
case Plus return "+";
case Dash return "-";
case Asterisk return "*";
case Dasterisk return "**";
case Slash return "/";
case QuestionMark return "?";
case Ampersand return "&";
case Dampersand return "&&";
case Pipe return "|";
case Dpipe return "||";
case Dleft return "<<";
case Dright return ">>";
case GreaterEqual return ">=";
case LesserEqual return "<=";
case Percent return "%";
case Tilde return "~";
case Carot return "^";
}
assert false, "Invalid symbol";
}
enum TokenContents {
Keyword(Keyword),
Identifier(str),
Number(int),
String(str),
Symbol(Symbol),
Eof
}
func token_contents_to_str(token: TokenContents) -> str {
match token in {
case Keyword(keyword) return "Keyword(%s)" % keyword_to_str(keyword);
case Identifier(string) return "Identifier(%s)" % string;
case Number(number) return "Number(%d)" % number;
case String(string) return "String(\"%s\")" % string;
case Symbol(symbol) return "Symbol('%s')" % symbol_to_str(symbol);
case Eof return "Eof";
}
}
struct Token {
line: int,
col: int,
value: str,
contents: TokenContents
}
func token_to_str(token: Token) -> str {
return token_contents_to_str(token.contents)+":"+int_to_str(token.line)+":"+int_to_str(token.col);
}
enum OptionalToken {
Some(Token),
None
}
func is_some_token(maybe_token: OptionalToken) -> bool {
match maybe_token in {
case Some(_) return true;
case None return false;
}
}

104
ppp_tokens.py Normal file
View File

@ -0,0 +1,104 @@
from dataclasses import dataclass
from enum import Enum
from typing import List, Literal, Tuple, Union
class Keyword(Enum):
Enum = 'enum'
Struct = 'struct'
Func = 'func'
If = 'if'
Else = 'else'
While = 'while'
Break = 'break'
Continue = 'continue'
Do = 'do'
For = 'for'
To = 'to'
In = 'in'
Match = 'match'
Case = 'case'
Assert = 'assert'
Return = 'return'
Lambda = 'lambda'
Import = 'import'
Type = 'type'
class Symbol(Enum):
Open = '('
Close = ')'
OpenCurly = '{'
CloseCurly = '}'
Comma = ','
OpenSquare = '['
CloseSquare = ']'
Colon = ':'
Left = '<'
Right = '>'
Arrow = '->'
EqualArrow = '=>'
Semicolon = ';'
Equal = '='
Dequal = '=='
Exclamation = '!'
NotEqual = '!='
Dot = '.'
Plus = '+'
Dash = '-'
Asterisk = '*'
Dasterisk = '**'
Slash = '/'
QuestionMark = '?'
Ampersand = '&'
Dampersand = '&&'
Pipe = '|'
Dpipe = '||'
Dleft = '<<'
Dright = '>>'
GreaterEqual = '>='
LesserEqual = '<='
Percent = '%'
Tilde = '~'
Carot = '^'
@dataclass
class KeywordToken:
keyword: Keyword
@dataclass
class IdentifierToken:
identifier: str
@dataclass
class NumberToken:
number: int
@dataclass
class StringToken:
string: str
@dataclass
class SymbolToken:
symbol: Symbol
def __hash__(self) -> int:
return hash(('symbol', self.symbol))
@dataclass
class EofToken: pass
TokenContents = Union[
KeywordToken,
IdentifierToken,
NumberToken,
StringToken,
SymbolToken,
EofToken
]
@dataclass
class Token:
line: int
col: int
value: str
contents: TokenContents

130
ppp_types.ppp Normal file
View File

@ -0,0 +1,130 @@
enum Type {
Int,
Str,
Bool,
Void,
Type,
Tuple(Type[]),
List(Type),
Array(Type, int),
Function(Type[], Type),
Union(Type[]),
Object,
Return(Type),
Enum(str, dict[str, Type[]], Type[]),
EnumStruct(Type, str),
Struct(str, dict[str, Type], Type[]),
Variable(str)
}
func type_represent(type_: Type) -> str {
match type_ in {
case Int return "int";
case Str return "str";
case Bool return "bool";
case Void return "void";
case Type return "type";
case Function(arguments, return_type) return "(%s) -> %s" % (join_by(", ", [type_represent(type_) for type_ in arguments]), type_represent(return_type));
case Enum(name, _, generics) do {
assert len(generics) == 0;
return name;
}
case Variable(name) return name + "?";
case Struct(name, _, generics) do {
assert len(generics) == 0;
return name;
}
case List(type_) return type_represent(type_) + "[]";
case Tuple(types) return "(" + join_by(", ", [type_represent(type_) for type_ in types]) + ")";
case _ do {
debug_print(type_);
assert false, "type_represent unimplemented";
}
}
}
func type_eq(a_type: Type, b_type: Type) -> bool return type_is_subtype_of(a_type, b_type) && type_is_subtype_of(b_type, a_type);
func type_is_subtype_of(type_a: Type, type_b: Type) -> bool {
match type_a in {
case Type match type_b in {
case Type return true;
case _ return false;
}
case Function(a_arguments, a_return_type) match type_b in {
case Function(b_arguments, b_return_type) do {
assert len(a_arguments) == len(b_arguments);
for i in range(0, len(a_arguments)) if !type_is_subtype_of(b_arguments[i], a_arguments[i]) return false;
return type_is_subtype_of(a_return_type, b_return_type);
}
case _ assert false;
}
case Str match type_b in {
case Str return true;
case _ return false;
}
case Enum(a_name, _, a_generics) match type_b in {
case Enum(b_name, _, b_generics) do {
assert len(a_generics) == 0;
assert len(b_generics) == 0;
return a_name == b_name;
}
}
case Struct(a_name, a_members, a_generics) match type_b in {
case Struct(b_name, b_members, b_generics) do {
assert len(a_generics) == 0;
assert len(b_generics) == 0;
return a_name == b_name;
}
}
case Bool match type_b in {
case Bool return true;
case _ return false;
}
case List(a_type) match type_b in {
case List(b_type) do {
match a_type in { case Variable(_) return true; }
match b_type in { case Variable(_) return true; }
return type_eq(a_type, b_type);
}
}
case Tuple(a_types) match type_b in {
case Tuple(b_types) do {
if len(a_types) != len(b_types) return false;
for i in range(0, len(a_types)) if !type_is_subtype_of(a_types[i], b_types[i]) return false;
return true;
}
case _ return false;
}
case _ assert false, "Unimplemented type_is_subtype_of %s, %s" % (type_represent(type_a), type_represent(type_b));
}
}
func type_fill(type_: Type, types: dict[str, Type], stack: int[]) -> Type {
type_id: int = id(type_);
new_stack: int[] = stack+[type_id];
match type_ in {
case Int return Type.Int;
case Str return Type.Str;
case Bool return Type.Bool;
case Enum(name, members, generics) do {
for id_ in stack if id_ == type_id return type_;
assert len(generics) == 0, "Unimplemented type_fill enum generics";
for member in members members[member] = [type_fill(element, types, new_stack) for element in members[member]];
for i in range(0, len(generics)) generics[i] = type_fill(generics[i], types, new_stack);
return type_;
}
case Variable(name) do {
for type_name in types if type_name == name return types[name];
return type_;
}
case Struct(name, members, generics) do {
assert len(generics) == 0;
for field in members members[field] = type_fill(members[field], types, new_stack);
return type_;
}
case List(type_) return Type.List(type_fill(type_, types, new_stack));
case Tuple(types_) return Type.Tuple([type_fill(type_, types, new_stack) for type_ in types_]);
case _ assert false, "Unimplemented type_fill %s" % type_represent(type_);
}
}

306
ppp_types.py Normal file
View File

@ -0,0 +1,306 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Tuple, Union
import sys
sys.setrecursionlimit(1000)
class Type(ABC):
def is_indexable(self) -> bool: return False
def is_subtype_of(self, other: 'Type') -> bool:
match self, other:
case StrType(), StrType(): return True
case TypeType_(), TypeType_(): return True
case FunctionType(self_arguments, self_return_type), FunctionType(other_arguments, other_return_type):
assert len(self_arguments) == len(other_arguments)
for (self_argument, other_argument) in zip(self_arguments, other_arguments):
if not other_argument.is_subtype_of(self_argument): return False
return self_return_type.is_subtype_of(other_return_type)
case EnumType(self_name, self_members), EnumType(other_name, other_members):
# if self_name == other_name: assert self is other, (num_expressions, self, other, self_name, other_name, self_members, other_members)
return self is other
return self_name == other_name
case StructType(_, _), StructType(_, _): return self is other
case VoidType(), VoidType(): return True
case ListType(self_type), ListType(other_type):
# TODO: Maybe return which types match
if isinstance(self_type, VariableType): return True
if isinstance(other_type, VariableType): return True
return self_type == other_type
return self_type.is_subtype_of(other_type)
case TupleType(self_elememts), TupleType(other_elements):
if len(self_elememts) != len(other_elements): return False
for (self_element, other_element) in zip(self_elememts, other_elements):
if not self_element.is_subtype_of(other_element): return False
return True
case IntType(), IntType():
return True
case VariableType(self_name), VariableType(other_name):
return self_name == other_name
case _, VariableType(""): return True
case type, UnionType(types):
for union_type in types:
if type.is_subtype_of(union_type): return True
return False
case BoolType(), BoolType(): return True
case DictionaryType(self_key_type, self_value_type), DictionaryType(other_key_type, other_value_type):
if isinstance(self_key_type, VariableType) and self_key_type.name == "" and isinstance(self_value_type, VariableType) and self_value_type.name == "": return True
return other_key_type.is_subtype_of(self_key_type) and self_value_type.is_subtype_of(other_value_type)
case type, ObjectType(): return True
case type_a, type_b if type_a.__class__ != type_b.__class__: return False
case _, _: assert False, ("Unimplemented", self, other)
assert False, ("Unimplemented", self, other)
def __eq__(self, other):
return isinstance(other, Type) and self.is_subtype_of(other) and other.is_subtype_of(self)
@abstractmethod
def represent(self) -> str: ...
@abstractmethod
def fill(self, types: 'Dict[str, Type]', stack: List[int]) -> 'Type': ...
@abstractmethod
def new_fill(self, types: 'Dict[str, Type]', stack: List[int]) -> 'Tuple[bool, Type]': ...
def new_fill_list(self, type_list: 'List[Type]', types: 'Dict[str, Type]', stack: List[int]) -> 'Tuple[bool, List[Type]]':
new_types = [type.new_fill(types, stack+[id(self)]) for type in type_list]
is_new = any([new_type[0] for new_type in new_types])
return (is_new, [new_type[1] for new_type in new_types])
def new_fill_dict(self, type_dict: 'Dict[str, Type]', types: 'Dict[str, Type]', stack: List[int]) -> 'Tuple[bool, Dict[str, Type]]':
new_types = {field: type_dict[field].new_fill(types, stack+[id(self)]) for field in type_dict}
is_new = any([new_types[field][0] for field in new_types])
return (is_new, {field: new_types[field][1] for field in new_types})
class Primitive(Type):
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type: return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]: return (False, self)
class IntType(Primitive):
def represent(self) -> str: return 'int'
Int = IntType()
class StrType(Primitive):
def is_indexable(self) -> bool: return True
def represent(self) -> str: return 'str'
Str = StrType()
class BoolType(Primitive):
def represent(self) -> str: return 'bool'
Bool = BoolType()
class VoidType(Primitive):
def represent(self) -> str: return 'void'
Void = VoidType()
class TypeType_(Primitive):
def represent(self) -> str: return 'type'
TypeType = TypeType_()
@dataclass
class TupleType(Type):
types: List[Type]
def is_indexable(self) -> bool: return True
def represent(self) -> str: return '('+', '.join([type.represent() for type in self.types])+')'
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.types = [type.fill(types, stack+[id(self)]) for type in self.types]
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
is_new, new_types = self.new_fill_list(self.types, types, stack)
return (is_new, TupleType(new_types))
@dataclass
class ListType(Type):
type: Type
def is_indexable(self) -> bool: return True
def represent(self) -> str: return self.type.represent()+'[]'
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.type = self.type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new, new_type = self.type.new_fill(types, stack+[id(self)])
return (is_new, ListType(new_type))
@dataclass
class ArrayType(Type):
type: Type
number: int
def is_indexable(self) -> bool: return True
@dataclass
class FunctionType(Type):
arguments: List[Type]
return_type: Type
def represent(self) -> str: return '('+', '.join([type.represent() for type in self.arguments])+') -> '+self.return_type.represent()
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.arguments = [argument.fill(types, stack+[id(self)]) for argument in self.arguments]
self.return_type = self.return_type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack # TODO: Wtf?
is_new_arguments, new_arguments = self.new_fill_list(self.arguments, types, stack)
is_new_return_type, new_return_type = self.return_type.new_fill(types, stack+[id(self)])
return (is_new_arguments or is_new_return_type, FunctionType(new_arguments, new_return_type))
@dataclass
class UnionType(Type):
types: List[Type]
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.types = [type.fill(types, stack+[id(self)]) for type in self.types]
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
is_new, new_types = self.new_fill_list(self.types, types, stack)
return (is_new, UnionType(new_types))
def represent(self) -> str: return '('+'|'.join([type.represent() for type in self.types])+')'
class ObjectType(Primitive):
def represent(self) -> str: return 'object'
Object = ObjectType()
@dataclass
class ReturnType(Type):
type: Type
def represent(self) -> str: return f"return<{self.type.represent()}>"
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.type = self.type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new, new_type = self.type.new_fill(types, stack+[id(self)])
return (is_new, ReturnType(new_type))
num_expressions: int = 0
@dataclass
class EnumType(Type):
name: str
members: Dict[str, List[Type]]
generics: List[Type]
def __repr__(self) -> str: return self.represent()
def represent(self) -> str: return self.name+('['+', '.join([generic.represent() for generic in self.generics])+']' if self.generics else '')
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.members = {member_name: [element.fill(types, stack+[id(self)]) for element in self.members[member_name]] for member_name in self.members}
self.generics = [type.fill(types, stack+[id(self)]) for type in self.generics]
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new = False
new_members: Dict[str, List[Type]] = {}
for member_name in self.members:
member = self.members[member_name]
is_new_member, new_members[member_name] = self.new_fill_list(member, types, stack)
is_new = is_new or is_new_member
return (is_new, EnumType(self.name, new_members, self.generics) if is_new else self)
def __hash__(self) -> int:
return hash(('type', 'enum', self.name, tuple([(member, tuple(self.members[member])) for member in self.members]), tuple(self.generics)))
@dataclass
class StructType(Type):
name: str
members: Dict[str, Type]
generics: List[Type]
def represent(self) -> str:
assert not self.generics
return self.name
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
for field in self.members:
self.members[field] = self.members[field].fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new, new_members = self.new_fill_dict(self.members, types, stack)
return (is_new, StructType(self.name, new_members, self.generics) if is_new else self)
@dataclass
class VariableType(Type):
name: str
def represent(self) -> str: return self.name + '?'
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
return types.get(self.name, self)
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
return (self.name in types, types.get(self.name, self))
@dataclass
class DictionaryType(Type):
key_type: Type
value_type: Type
def represent(self) -> str: return f"dict[{self.key_type.represent()}, {self.value_type.represent()}]"
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.key_type = self.key_type.fill(types, stack+[id(self)])
self.value_type = self.value_type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
is_new_key, new_key_type = self.key_type.new_fill(types, stack+[id(self)])
is_new_value, new_value_type = self.value_type.new_fill(types, stack+[id(self)])
return (is_new_key or is_new_value, DictionaryType(new_key_type, new_value_type))
def is_indexable(self) -> bool: return True
@dataclass
class GenericType(Type):
variables: List[VariableType]
type: Type
def represent(self) -> str: assert False
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.type = self.type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert False
def substitute(self, types: List[Type]) -> Type:
assert len(types) == len(self.variables), f"{self.type.represent()} expected {len(self.variables)} type parameters, but got {len(types)}!"
return self.type.new_fill({variable.name: type for (variable, type) in zip(self.variables, types)}, [])[1]

180
ppp_types0.py Normal file
View File

@ -0,0 +1,180 @@
from dataclasses import dataclass
from types import EllipsisType
from typing import Dict, Optional, Union as Union_, Tuple as Tuple_, List as List_
@dataclass
class StructContents:
generics: 'List_[Type]'
members: 'Dict[str, Type]'
@dataclass
class TupleContents:
elements: 'List_[Type]'
@dataclass
class EnumContents:
generics: 'List_[Type]'
members: 'Dict[str, Union_[Dict[str, Type], List_[Type]]]'
@dataclass
class UnknownContents:
pass
@dataclass
class FunctionContents:
arguments: 'List_[Type]'
return_type: 'Type'
@dataclass
class ListContents:
type: 'Optional[Type]'
@dataclass
class UnionContents:
types: 'List_[Type]'
@dataclass
class Type:
name: str
contents: Union_[EnumContents, StructContents, TupleContents, FunctionContents, ListContents, UnknownContents, UnionContents]
def specify(self, *types: 'Type') -> 'Type':
assert False, ("Unimplemented")
def is_subtype_of(self, other: 'Type') -> bool: # TODO: Maybe return any generics that match
if other.name == 'object': return True
match self.contents, other.contents:
case (EnumContents(self_members), EnumContents(other_members)) | (StructContents(self_members), StructContents(other_members)):
return self.name == other.name
case TupleContents(self_elements), TupleContents(other_elements):
if self.name != other.name: return False
if len(self_elements) != len(other_elements): return False
for (self_element, other_element) in zip(self_elements, other_elements):
if not self_element.is_subtype_of(other_element): return False
return True
case FunctionContents(self_arguments, self_return_type), FunctionContents(other_arguments, other_return_type):
for (self_argument, other_argument) in zip(self_arguments, other_arguments):
if not other_argument.is_subtype_of(self_argument): return False
return self_return_type.is_subtype_of(other_return_type)
case ListContents(self_type), ListContents(other_type):
if self_type is None: return True
if other_type is None: return True
return self_type.is_subtype_of(other_type)
case UnionContents(self_types), UnionContents(other_types):
for type_ in self_types:
if not type_.is_subtype_of(other): return False
return True
case a, b if type(a) == type(b):
assert False, ("Unimplemented", self, other)
case _, UnionContents(types):
for type_ in types:
if self.is_subtype_of(type_): return True
return False
case _, _:
return False
assert False, ("Unimplemented")
def fill(self, types: 'Dict[str, Type]') -> 'Type':
match self.contents:
case TupleContents(elements):
return Type(self.name, TupleContents([type.fill(types) for type in elements]))
case EnumContents(generics, members):
assert not generics # TODO
new_members: Dict[str, Union_[Dict[str, Type], List_[Type]]] = {}
for name in members:
member = members[name]
if isinstance(member, list):
new_members[name] = [type.fill(types) for type in member]
elif isinstance(member, dict):
new_members[name] = {field: member[field].fill(types) for field in member}
else:
assert False, "Unreachable"
return Type(self.name, EnumContents(generics, new_members))
case StructContents(generics, members):
assert not generics # TODO
return Type(self.name, StructContents(generics, {field: members[field].fill(types) for field in members}))
case ListContents(type):
return Type(self.name, ListContents(type.fill(types) if type else None))
case UnknownContents():
return types[self.name] if self.name in types else self
case UnionContents(types_):
return Type(self.name, UnionContents([type.fill(types) for type in types_]))
case FunctionContents(arguments, return_type):
return Type(self.name, FunctionContents([argument.fill(types) for argument in arguments], return_type.fill(types)))
case _:
assert False, ("Unimplemented", self.contents)
assert False, "Unreachable"
def represent(self) -> str:
match self.contents:
case EnumContents(generics, _) | StructContents(generics, _): return self.name + ("<"+', '.join([generic.represent() for generic in generics])+">" if generics else '')
case TupleContents(elements): return (self.name if self.name != "tuple" else '') + ('('+', '.join([generic.represent() for generic in elements])+')' if elements else '')
case ListContents(type): return (type.represent() if type else '')+'[]'
case UnknownContents(): return self.name+"?"
case UnionContents(types): return '('+'|'.join([type.represent() for type in types])+')'
case FunctionContents(arguments, return_type): return "("+', '.join([type.represent() for type in arguments])+") -> "+return_type.represent()
assert False, ("Unimplemented")
def __eq__(self, other) -> bool:
return isinstance(other, Type) and self.is_subtype_of(other) and other.is_subtype_of(self)
def __repr__(self) -> str:
try:
return self.represent()
except AssertionError:
return f"Unimplemented {self.contents.__class__.__name__}"
def primitive(name: str) -> Type: return Type(name, TupleContents([]))
Int = primitive("int")
Str = primitive("str")
Bool = primitive("bool")
Void = primitive("void")
TypeType = primitive("type")
def Tuple(*types: Type) -> Type: return Type("tuple", TupleContents(list(types)))
def List(type: Optional[Type]=None) -> Type: return Type("list", ListContents(type))
def Function(*arguments_and_return_type: Type) -> Type:
assert arguments_and_return_type, "Must have a return value"
return Type("function", FunctionContents(list(arguments_and_return_type[:-1]), arguments_and_return_type[-1]))
Unit = Tuple()
def Union(*types: Type) -> Type: return Type("union", UnionContents(list(types)))
def Return(type: Type) -> Type:
return Type('return', StructContents([type], {'value': type}))
Object = primitive('object')
# TODO: struct enum members
def EnumMember(enum_type: Type, *types: Type) -> Type: return Type('enum_tuple_member', FunctionContents(list(types), enum_type))
# def issubclassof(type1: Type, type2: Type) -> bool:
# if type1.name != 'union' and type2.name == 'union':
# return type1 in type2.generics
# if type1.name != type2.name: return False
# if not type2.generics: return True
# if len(type1.generics) != len(type2.generics): return False
# if type1.name == '->':
# for (type_a, type_b) in zip(type1.generics[:-1], type2.generics[:-1]):
# if not issubclassof(type_b, type_a): return False
# return issubclassof(type1.generics[-1], type2.generics[-1])
# if type1.name == 'union': assert False, ("Unimplemented", type1, type2)
# if type1.name == 'tuple': return all([issubclassof(type_a, type_b) for (type_a, type_b) in zip(type1.generics, type2.generics)])
# assert False, ("Unimplemented", type1, type2)
# Void = Type('void', TupleContents([]))
# AbstractFunction = Type('->', [])
# def Function(*arguments_and_return_type: Type) -> Type:
# assert arguments_and_return_type, "Must have a return value"
# return AbstractFunction.specify(*arguments_and_return_type)
# Int = Type('int', [], False)
# String = Type('str', [], False)
# Bool = Type('bool', [], False)
# def List(type: Type) -> Type: return Type('list', [type])
# Unit = Type('unit', [], False)
# def Tuple(*types: Type) -> Type: return Type('tuple', list(types)) if types else Unit

2
test.ppp Normal file
View File

@ -0,0 +1,2 @@
x: int = "Hello, World!\n";
print(x);

27
type-inference.md Normal file
View File

@ -0,0 +1,27 @@
# Type Inference + Generics
The interpreter needs to evaluate expressions with an expectation of what type they will result in. For example, if it encounters an empty list expecting a list of ints, then it will return an empty list but the type information will convey that it is a list of ints. Currently the way it works is that it just returns a list where the internal type of the list is an empty variable type, which has been hard-coded to be a subclass of any list and have any list be a subclass of that type. Evaluating with knowledge of the expected type will also allow for enum members to be returned or referenced with a '.' + the enum member name, rather than having to name the enum type itself every single time.
## Generics + Generic functions
Maybe I can also remove the 'object' type. I could have any function that can take in any opject just use a generic type parameter. E.g., The `len` function can be of type `A[] -> int`, where `A` is just not used in the return_type. Maybe for generic functions I will need to have the definition indicate it. E.g.,
```
func len<A> (list: A[]) -> int { ... }
```
You could probably still call the function without needing to specify `A`. E.g.,
```
ints: int[] = [0, 1, 2, 3, 4, 5]
num_ints = len(ints);
```
You won't need to, but still can do this:
```
num_ints = len<int>(ints);
```
It would probably quite hard to parse, however. How would the parser know the difference between that first `<` and a less than comparison between some variable `len` and some variable `int`? It would need to at some point start parsing for a type, but when will it know that? `len<int>(ints)` can be justifiably parsed as `len` less than `int` greater than `ints`, where `ints` has been wrapped in parentheses and you are comparing a booling to a list. But of course, the parser has no knowledge of what types expressions are. A similar situation would occur if I did `len[int](ints)` instead. It could be parsed as array `len` element number `int`, called with the argument `ints`.
Should I even allow the user to specify the type for a generic function? What would they need that for. Maybe they need to return an array of a higher type a type inferer would infer. For example, they need a function to return a `object[]`, and it just so happens that they know that they start of with a `int[]`, but will add objects later, or instead of `int` and `object`, any two types where the former is a subtype of the latter.
Um, actually. I think what I'll do is have the language infer the types for any generic functions, and then convert the return value to any types if you indicate it.