Compare commits

..

7 Commits

Author SHA1 Message Date
9e68c529bf
Clean up ppp_stdlib.py 2024-10-01 14:37:23 +10:00
04fff7514e
Make array literals explicitly state the element type 2024-10-01 14:30:05 +10:00
e48d50f1e6
Make structure instantiation StructType.{args...} instead of StructType{args...} 2024-10-01 14:30:05 +10:00
f3ed26f131
Make return a regular statement instead of an expression 2024-10-01 14:29:41 +10:00
18b22cd5d1
Fix token location reporting 2024-10-01 11:28:10 +10:00
b02ca87760
Remove union types
I will need to implement polymorphism later to allow for functions like `len` to work. Len currently relies on saying that its argument is either a list or a string, as strings are not a subtype of lists.
2024-08-13 12:45:42 +10:00
dd3b933e03
Make imports expect a string, not an expression 2024-08-12 00:16:13 +10:00
8 changed files with 160 additions and 205 deletions

View File

@ -1,6 +1,6 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union from typing import Dict, List, Optional, Tuple
### Types ### ### Types ###
@ -15,13 +15,6 @@ class TupleTypeExpr(TypeExpression):
def represent(self) -> str: def represent(self) -> str:
assert False, ("Unimplemented") assert False, ("Unimplemented")
@dataclass
class UnionTypeExpr(TypeExpression):
types: List[TypeExpression]
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass @dataclass
class ListTypeExpr(TypeExpression): class ListTypeExpr(TypeExpression):
type: TypeExpression type: TypeExpression
@ -144,6 +137,7 @@ class ArrayAccess(Expression):
@dataclass @dataclass
class Array(Expression): class Array(Expression):
element_type: TypeExpression
array: List[Expression] array: List[Expression]
def represent(self) -> str: def represent(self) -> str:
@ -200,6 +194,7 @@ class StructInstantiation(Expression):
@dataclass @dataclass
class LoopComprehension(Expression): class LoopComprehension(Expression):
element_type: TypeExpression
body: Expression body: Expression
variable: str # TODO: Pattern matching variable: str # TODO: Pattern matching
array: Expression array: Expression
@ -209,16 +204,6 @@ class LoopComprehension(Expression):
def precedence(self) -> int: return 13 def precedence(self) -> int: return 13
@dataclass
class Return(Expression):
expression: Expression
def represent(self) -> str:
# TODO: This will have to be improved
return "return "+self.wrap(self.expression)
def precedence(self) -> int: return 0
@dataclass @dataclass
class Lambda(Expression): class Lambda(Expression):
parameters: List[TypeDeclaration] parameters: List[TypeDeclaration]
@ -491,7 +476,6 @@ class DoWhileStatement(Statement):
body: Statement body: Statement
condition: Optional[Expression] condition: Optional[Expression]
# TODO: Maybe do something similar to return with these two?
@dataclass @dataclass
class BreakStatement(Statement): class BreakStatement(Statement):
pass pass
@ -500,6 +484,10 @@ class BreakStatement(Statement):
class ContinueStatement(Statement): class ContinueStatement(Statement):
pass pass
@dataclass
class ReturnStatement(Statement):
expression: Expression
@dataclass @dataclass
class MatchStatement(Statement): class MatchStatement(Statement):
value: Expression value: Expression
@ -518,7 +506,7 @@ class ForLoop(Statement):
@dataclass @dataclass
class Import(Statement): class Import(Statement):
file: Expression file: str
@dataclass @dataclass
class TypeDefinition(Statement): class TypeDefinition(Statement):

View File

@ -1,13 +1,13 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List as List_, Optional, Tuple, Union from typing import Dict, List as List_, Optional, Tuple
from ppp_ast import * from ppp_ast import *
from ppp_lexer import Lexer from ppp_lexer import Lexer
from ppp_object import Bool, EnumValue, Function, Int, Object, Str, Struct, Tuple as TupleObject, List as ListObject, Return as ReturnObject, TypeObject, Void from ppp_object import Bool, EnumValue, Function, Int, Object, Str, Struct, Tuple as TupleObject, List as ListObject, TypeObject, Void
from ppp_parser import is_valid_target, parse_statement from ppp_parser import is_valid_target, parse_statement
from ppp_tokens import EofToken from ppp_tokens import EofToken
from ppp_stdlib import variables from ppp_stdlib import variables
from ppp_types import EnumType, FunctionType, GenericType, Int as IntType, ListType, ReturnType, Str as StrType, StructType, TupleType, Type, TypeType, UnionType, VariableType, Void as VoidType from ppp_types import EnumType, FunctionType, GenericType, Int as IntType, ListType, Str as StrType, StructType, TupleType, Type, TypeType, VariableType, Void as VoidType
@dataclass @dataclass
class Declared: class Declared:
@ -31,7 +31,7 @@ class Constant:
def from_obj(obj: Object) -> 'Declared': def from_obj(obj: Object) -> 'Declared':
return Declared(obj.get_type(), obj) return Declared(obj.get_type(), obj)
VariableState = Union[Declared, Undeclared, Constant] VariableState = Declared | Undeclared | Constant
Module = Dict[str, VariableState] Module = Dict[str, VariableState]
@ -218,9 +218,6 @@ def calculate_expression(expression: Expression, program: ProgramState) -> Objec
case Int(num): return Str(left_value.str % num) case Int(num): return Str(left_value.str % num)
case _: assert False, ("Unimplemented", right_value) case _: assert False, ("Unimplemented", right_value)
assert False, ("Unimplemented", lhs, rhs) assert False, ("Unimplemented", lhs, rhs)
case Return(expression):
value = calculate_expression(expression, program)
return ReturnObject(ReturnType(value.get_type()), value)
case StructInstantiation(struct_, arguments_): case StructInstantiation(struct_, arguments_):
struct = calculate_expression(struct_, program) struct = calculate_expression(struct_, program)
assert isinstance(struct, TypeObject) assert isinstance(struct, TypeObject)
@ -291,38 +288,26 @@ def calculate_expression(expression: Expression, program: ProgramState) -> Objec
assert False, ("Unimplemented", expression_) assert False, ("Unimplemented", expression_)
case UnaryMinus (expression_): case UnaryMinus (expression_):
assert False, ("Unimplemented", expression_) assert False, ("Unimplemented", expression_)
case Array(array_): case Array(element_type_, array_):
if len(array_) == 0: element_type = calculate_type_expression(element_type_, program)
return ListObject(ListType(VariableType("")), [])
elements_type: Optional[Type] = None
array_elements_: List_[Object] = [] array_elements_: List_[Object] = []
for element_ in array_: for element_ in array_:
element = calculate_expression(element_, program) element = calculate_expression(element_, program)
if elements_type: assert element.get_type().is_subtype_of(element_type), (element, element_type)
assert element.get_type().is_subtype_of(elements_type), (element, elements_type)
else:
elements_type = element.get_type()
array_elements_.append(element) array_elements_.append(element)
assert elements_type return ListObject(ListType(element_type), array_elements_)
return ListObject(ListType(elements_type), array_elements_) case LoopComprehension(element_type_, body_, variable, array_):
case LoopComprehension(body_, variable, array_): element_type = calculate_type_expression(element_type_, program)
array = calculate_expression(array_, program) array = calculate_expression(array_, program)
assert array.get_type().is_indexable() assert array.get_type().is_indexable()
if isinstance(array, ListObject): if isinstance(array, ListObject):
elements: List_[Object] = [] elements: List_[Object] = []
elements_type = None
for element in array.list: for element in array.list:
program.push_context({variable: Declared.from_obj(element)}) program.push_context({variable: Declared.from_obj(element)})
elements.append(calculate_expression(body_, program)) elements.append(calculate_expression(body_, program))
program.pop_context() program.pop_context()
if elements_type: assert elements[-1].get_type().is_subtype_of(element_type)
assert elements[-1].get_type().is_subtype_of(elements_type) return ListObject(ListType(element_type), elements)
else:
elements_type = elements[-1].get_type()
if not elements: return ListObject(ListType(VariableType("")), [])
assert elements_type
return ListObject(ListType(elements_type), elements)
else: else:
assert False, ("Unimplemented", array) assert False, ("Unimplemented", array)
case _: case _:
@ -340,8 +325,6 @@ def calculate_type_expression(expression: TypeExpression, program: ProgramState,
return ListType(calculate_type_expression(type_, program, must_resolve)) return ListType(calculate_type_expression(type_, program, must_resolve))
case TupleTypeExpr(types_): case TupleTypeExpr(types_):
return TupleType([calculate_type_expression(type, program, must_resolve) for type in types_]) return TupleType([calculate_type_expression(type, program, must_resolve) for type in types_])
case UnionTypeExpr(types_):
return UnionType([calculate_type_expression(type, program, must_resolve) for type in types_])
case FunctionTypeExpr(arguments_, return_type_): case FunctionTypeExpr(arguments_, return_type_):
return FunctionType([calculate_type_expression(argument, program, must_resolve) for argument in arguments_], calculate_type_expression(return_type_, program, must_resolve)) return FunctionType([calculate_type_expression(argument, program, must_resolve) for argument in arguments_], calculate_type_expression(return_type_, program, must_resolve))
case TypeSpecification(type_, types_): case TypeSpecification(type_, types_):
@ -413,14 +396,13 @@ class BreakResult:
class NothingResult: class NothingResult:
pass pass
StatementsResult = Union[ReturnResult, ContinueResult, BreakResult, NothingResult] StatementsResult = ReturnResult | ContinueResult | BreakResult | NothingResult
def interpret_statements(statements: List_[Statement], program: ProgramState) -> StatementsResult: def interpret_statements(statements: List_[Statement], program: ProgramState) -> StatementsResult:
for statement in statements: for statement in statements:
match statement: match statement:
case ExpressionStatement(expression): case ExpressionStatement(expression):
value = calculate_expression(expression, program) calculate_expression(expression, program)
if isinstance(value, ReturnObject): return ReturnResult(value.value)
case Assignment(lhs, rhs, type_): case Assignment(lhs, rhs, type_):
assert is_valid_target(lhs) assert is_valid_target(lhs)
match lhs: match lhs:
@ -554,14 +536,14 @@ def interpret_statements(statements: List_[Statement], program: ProgramState) ->
case _: assert False, ("Unimplemented", return_value) case _: assert False, ("Unimplemented", return_value)
case ContinueStatement(): return ContinueResult() case ContinueStatement(): return ContinueResult()
case BreakStatement(): return BreakResult() case BreakStatement(): return BreakResult()
case Import(file_): case ReturnStatement(expression=expression):
return ReturnResult(calculate_expression(expression, program))
case Import(file):
# TODO: Maybe an inclusion system within a preprocessor maybe # TODO: Maybe an inclusion system within a preprocessor maybe
file = calculate_expression(file_, program) module = interpret_file(file, program.modules) if file not in program.modules else program.modules[file]
assert isinstance(file, Str), "Only strings are valid file paths!"
module = interpret_file(file.str, program.modules) if file.str not in program.modules else program.modules[file.str]
program.contexts[0] |= module program.contexts[0] |= module
if file.str not in program.modules: if file not in program.modules:
program.modules[file.str] = module program.modules[file] = module
case TypeDefinition(name, expression_): case TypeDefinition(name, expression_):
program.declare_and_assign_variable(name, TypeObject(calculate_type_expression(expression_, program))) program.declare_and_assign_variable(name, TypeObject(calculate_type_expression(expression_, program)))
case DeferStatement(statement=statement): case DeferStatement(statement=statement):
@ -582,7 +564,7 @@ def interpret_file(file_path: str, modules: Dict[str, Module]) -> Module:
assert len(program.contexts) == 2 assert len(program.contexts) == 2
match return_value: match return_value:
case NothingResult(): pass case NothingResult(): pass
case ReturnObject(_): assert False, "Cannot return from outside a function!" case ReturnResult(_): assert False, "Cannot return from outside a function!"
case ContinueResult(): assert False, "Cannot continue from outside a loop!" case ContinueResult(): assert False, "Cannot continue from outside a loop!"
case BreakResult(): assert False, "Cannot break from outside a loop!" case BreakResult(): assert False, "Cannot break from outside a loop!"
case _: assert False, ("Unimplemented", return_value) case _: assert False, ("Unimplemented", return_value)

View File

@ -1,31 +1,32 @@
from typing import Optional from typing import Optional
from ppp_tokens import EofToken, IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken, Token, TokenContents from ppp_tokens import EofToken, IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken, Token, TokenContents, Location
class Lexer: class Lexer:
def __init__(self, source: str) -> None: def __init__(self, source: str, filename: str) -> None:
self._source = source self._source = source
self._location = 0 self._location = 0
self._line = 1 self._line = 1
self._col = 0 self._col = 0
self._filename = filename
self._peeked_token: Optional[Token] = None self._peeked_token: Optional[Token] = None
self._current: str = ""
def _loc(self) -> Location:
return Location(self._filename, self._line, self._col)
def _token(self, loc: Location, value: str, contents: TokenContents) -> Token:
return Token(loc, value, contents)
@classmethod @classmethod
def from_file(cls, path: str) -> 'Lexer': def from_file(cls, path: str) -> 'Lexer':
with open(path) as f: with open(path) as f:
return cls(f.read()) return cls(f.read(), path)
def _advance(self) -> str: def _advance(self):
assert self._location < len(self._source) assert self._location < len(self._source)
self._line, self._col = (self._line + 1, 0) if self._current == '\n' else (self._line, self._col + 1) self._line, self._col = (self._line + 1, 0) if self._source[self._location] == '\n' else (self._line, self._col + 1)
self._location += 1 self._location += 1
self._current = self._source[self._location] if self._location < len(self._source) else ''
return self._current
# def _peek(self) -> str:
# assert self._location < len(self._source)-1
def next_token(self) -> Token: def next_token(self) -> Token:
if self._peeked_token is not None: if self._peeked_token is not None:
@ -34,71 +35,84 @@ class Lexer:
while self._location < len(self._source) and self._source[self._location] in ' \t\n': self._advance() while self._location < len(self._source) and self._source[self._location] in ' \t\n': self._advance()
if self._location >= len(self._source): return Token(self._line, self._col, '\0', EofToken()) if self._location >= len(self._source): return self._token(self._loc(), '\0', EofToken())
match self._source[self._location]: match self._source[self._location]:
case c if c.isdigit(): case c if c.isdigit():
start_location = self._location start_location = self._location
while self._location < len(self._source) and self._source[self._location].isdigit(): self._location += 1 loc = self._loc()
while self._location < len(self._source) and self._source[self._location].isdigit(): self._advance()
number = int(self._source[start_location:self._location]) number = int(self._source[start_location:self._location])
return Token(self._line, self._col, self._source[start_location:self._location], NumberToken(number)) return self._token(loc, self._source[start_location:self._location], NumberToken(number))
case c if c.isalpha() or c == "_": case c if c.isalpha() or c == "_":
start_location = self._location start_location = self._location
while self._location < len(self._source) and (self._source[self._location].isalpha() or self._source[self._location] in '_'): self._location += 1 loc = self._loc()
while self._location < len(self._source) and (self._source[self._location].isalpha() or self._source[self._location] in '_'): self._advance()
word = self._source[start_location:self._location] word = self._source[start_location:self._location]
try: try:
keyword = Keyword(word) keyword = Keyword(word)
return Token(self._line, self._col, word, KeywordToken(keyword)) return self._token(loc, word, KeywordToken(keyword))
except ValueError: except ValueError:
try: try:
symbol = Symbol(word) symbol = Symbol(word)
return Token(self._line, self._col, word, SymbolToken(symbol)) return self._token(loc, word, SymbolToken(symbol))
except ValueError: except ValueError:
return Token(self._line, self._col, word, IdentifierToken(word)) return self._token(loc, word, IdentifierToken(word))
case '"': case '"':
# TODO: Escaping # TODO: Proper escaping
self._location += 1 self._advance()
start_location = self._location start_location = self._location
loc = self._loc()
escaping = False escaping = False
while self._location < len(self._source) and (self._source[self._location] != '"' or escaping): while self._location < len(self._source) and (self._source[self._location] != '"' or escaping):
escaping = self._source[self._location] == '\\' if not escaping else False escaping = self._source[self._location] == '\\' if not escaping else False
self._location += 1 self._advance()
string = self._source[start_location:self._location].encode('utf-8').decode('unicode_escape') string = self._source[start_location:self._location].encode('utf-8').decode('unicode_escape')
self._location += 1 self._advance()
return Token(self._line, self._col, self._source[start_location-1:self._location], StringToken(string)) return self._token(loc, self._source[start_location-1:self._location], StringToken(string))
# TODO: Make a proper Trie for this. # TODO: Make a proper Trie for this.
case '|' if self._location < len(self._source)-1 and self._source[self._location+1] == '|': case '|' if self._location < len(self._source)-1 and self._source[self._location+1] == '|':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dpipe)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dpipe))
case '&' if self._location < len(self._source)-1 and self._source[self._location+1] == '&': case '&' if self._location < len(self._source)-1 and self._source[self._location+1] == '&':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dampersand)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dampersand))
case '*' if self._location < len(self._source)-1 and self._source[self._location+1] == '*': case '*' if self._location < len(self._source)-1 and self._source[self._location+1] == '*':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dasterisk)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dasterisk))
case '-' if self._location < len(self._source)-1 and self._source[self._location+1] == '>': case '-' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Arrow)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Arrow))
case '>' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '>' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.GreaterEqual)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.GreaterEqual))
case '<' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '<' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.LesserEqual)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.LesserEqual))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dequal)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dequal))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '>': case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.EqualArrow)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.EqualArrow))
case '!' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '!' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
self._location += 2 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.NotEqual)) self._advance(); self._advance()
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.NotEqual))
case c if c in Symbol._value2member_map_: case c if c in Symbol._value2member_map_:
self._location += 1 loc = self._loc()
return Token(self._line, self._col, self._source[self._location-1], SymbolToken(Symbol(c))) self._advance()
return self._token(loc, self._source[self._location-1], SymbolToken(Symbol(c)))
case _: case _:
assert False, ("Unimplemented", c, self._location) raise SyntaxError(f"{self._loc()}: Unknown character: '{c}'")
assert False, "Unreachable" assert False, "Unreachable"
def peek_token(self) -> Token: def peek_token(self) -> Token:
@ -108,12 +122,12 @@ class Lexer:
def assert_tokenkind(self, kind: type) -> Token: def assert_tokenkind(self, kind: type) -> Token:
token = self.next_token() token = self.next_token()
assert isinstance(token.contents, kind), (f"Expected {kind} but got {token.contents}!", self.next_token(), self.next_token(), self.next_token()) if not isinstance(token.contents, kind): raise SyntaxError(f"{token.loc}: Expected {kind} but got {token.contents}!")
return token return token
def assert_token(self, expected: TokenContents) -> Token: def assert_token(self, expected: TokenContents) -> Token:
token = self.next_token() token = self.next_token()
assert token.contents == expected, (f"Expected {expected} but got {token.contents}!", self.next_token(), self.next_token()) if token.contents != expected: raise SyntaxError(f"{token.loc}: Expected {expected} but got {token.contents}!")
return token return token
def check_token(self, expected: TokenContents) -> bool: def check_token(self, expected: TokenContents) -> bool:

View File

@ -1,11 +1,9 @@
# This file exists because I wanted to keep ppp_stdlib.py and ppp_interpreter.py seperate but they both rely on this one class.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Callable, Dict, List as List_, Tuple as Tuple_, Union as Union_ from typing import Callable, Dict, List as List_, Tuple as Tuple_
from ppp_ast import Statement from ppp_ast import Statement
from ppp_types import ArrayType, EnumType, FunctionType, ListType, ReturnType, StructType, TupleType, Type, Int as IntType, Str as StrType, Bool as BoolType, Void as VoidType, TypeType from ppp_types import ArrayType, EnumType, FunctionType, ListType, StructType, TupleType, Type, Int as IntType, Str as StrType, Bool as BoolType, Void as VoidType, TypeType
class Object(ABC): class Object(ABC):
@abstractmethod @abstractmethod
@ -68,13 +66,6 @@ class Function(Object):
def get_type(self) -> Type: return self.type def get_type(self) -> Type: return self.type
@dataclass
class Return(Object):
type: ReturnType
value: Object
def get_type(self) -> Type: return self.type
@dataclass @dataclass
class EnumValue(Object): class EnumValue(Object):
type: EnumType type: EnumType

View File

@ -23,22 +23,11 @@ def parse_type_primary(lexer: Lexer) -> TypeExpression:
if lexer.take_token(SymbolToken(Symbol.Open)): if lexer.take_token(SymbolToken(Symbol.Open)):
if lexer.take_token(SymbolToken(Symbol.Close)): return TupleTypeExpr([]) if lexer.take_token(SymbolToken(Symbol.Close)): return TupleTypeExpr([])
def parse_union(lexer: Lexer) -> TypeExpression: types: List[TypeExpression] = [parse_type(lexer)]
union_types: List[TypeExpression] = [parse_type(lexer)]
while lexer.take_token(SymbolToken(Symbol.Pipe)):
union_types.append(parse_type(lexer))
if len(union_types) == 1:
return union_types[0]
return UnionTypeExpr(union_types)
types: List[TypeExpression] = [parse_union(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)): while lexer.take_token(SymbolToken(Symbol.Comma)):
types.append(parse_union(lexer)) types.append(parse_type(lexer))
lexer.assert_token(SymbolToken(Symbol.Close)) lexer.assert_token(SymbolToken(Symbol.Close))
if len(types) == 1 and isinstance(types[0], UnionTypeExpr): base_type = TupleTypeExpr(types)
base_type = types[0]
else:
base_type = TupleTypeExpr(types)
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)): elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
type = parse_type(lexer) type = parse_type(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
@ -119,21 +108,25 @@ def parse_primary(lexer: Lexer) -> Expression:
else: else:
base_expression = elements[0] base_expression = elements[0]
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)): elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
lexer.assert_token(SymbolToken(Symbol.Colon))
element_type = parse_type(lexer)
if lexer.take_token(SymbolToken(Symbol.CloseSquare)): if lexer.take_token(SymbolToken(Symbol.CloseSquare)):
base_expression = Array([]) base_expression = Array(element_type, [])
else: else:
lexer.assert_token(SymbolToken(Symbol.Comma))
expressions: List[Expression] = [parse_expression(lexer)] expressions: List[Expression] = [parse_expression(lexer)]
if lexer.take_token(KeywordToken(Keyword.For)): if lexer.take_token(KeywordToken(Keyword.For)):
variable = parse_identifier(lexer) # TODO: Pattern matching variable = parse_identifier(lexer) # TODO: Pattern matching
lexer.assert_token(KeywordToken(Keyword.In)) lexer.assert_token(KeywordToken(Keyword.In))
expression = parse_expression(lexer) expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = LoopComprehension(expressions[0], variable, expression) base_expression = LoopComprehension(element_type, expressions[0], variable, expression)
else: else:
while lexer.take_token(SymbolToken(Symbol.Comma)): while lexer.take_token(SymbolToken(Symbol.Comma)):
expressions.append(parse_expression(lexer)) expressions.append(parse_expression(lexer))
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = Array(expressions) base_expression = Array(element_type, expressions)
elif lexer.check_tokenkind(StringToken): elif lexer.check_tokenkind(StringToken):
base_expression = String(parse_string(lexer)) base_expression = String(parse_string(lexer))
elif lexer.check_tokenkind(NumberToken): elif lexer.check_tokenkind(NumberToken):
@ -141,13 +134,34 @@ def parse_primary(lexer: Lexer) -> Expression:
else: else:
base_expression = Variable(parse_identifier(lexer)) base_expression = Variable(parse_identifier(lexer))
while (token := lexer.take_tokens(SymbolToken(Symbol.Open), SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Dot), SymbolToken(Symbol.OpenCurly))): while (token := lexer.take_tokens(SymbolToken(Symbol.Open), SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Dot))):
match token.contents: match token.contents:
case SymbolToken(symbol): case SymbolToken(symbol):
match symbol: match symbol:
case Symbol.Dot: case Symbol.Dot:
field = parse_identifier(lexer) next_token = lexer.next_token()
base_expression = FieldAccess(base_expression, field) match next_token.contents:
case IdentifierToken(identifier=field):
base_expression = FieldAccess(base_expression, field)
case SymbolToken(symbol=symbol):
match symbol:
case Symbol.OpenCurly:
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = StructInstantiation(base_expression, [])
else:
def parse_argument() -> Tuple[str, Expression]:
parameter = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
return (parameter, parse_expression(lexer))
struct_arguments: List[Tuple[str, Expression]] = [parse_argument()]
while lexer.take_token(SymbolToken(Symbol.Comma)): struct_arguments.append(parse_argument())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = StructInstantiation(base_expression, struct_arguments)
case _:
raise SyntaxError(f"{next_token.loc}: Unexpected symbol: {repr(str(symbol))}")
case _:
raise SyntaxError(f"{next_token.loc}: Unexpected: {next_token.contents}")
case Symbol.Open: case Symbol.Open:
if lexer.take_token(SymbolToken(Symbol.Close)): if lexer.take_token(SymbolToken(Symbol.Close)):
base_expression = FunctionCall(base_expression, []) base_expression = FunctionCall(base_expression, [])
@ -161,19 +175,6 @@ def parse_primary(lexer: Lexer) -> Expression:
index = parse_expression(lexer) index = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = ArrayAccess(base_expression, index) base_expression = ArrayAccess(base_expression, index)
case Symbol.OpenCurly:
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = StructInstantiation(base_expression, [])
else:
def parse_argument() -> Tuple[str, Expression]:
parameter = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
return (parameter, parse_expression(lexer))
struct_arguments: List[Tuple[str, Expression]] = [parse_argument()]
while lexer.take_token(SymbolToken(Symbol.Comma)): struct_arguments.append(parse_argument())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = StructInstantiation(base_expression, struct_arguments)
case _: assert False, ("Unimplemented", symbol) case _: assert False, ("Unimplemented", symbol)
case _: assert False, ("Unimplemented", token) case _: assert False, ("Unimplemented", token)
@ -184,7 +185,6 @@ def parse_unary(lexer: Lexer) -> Expression:
if lexer.take_token(SymbolToken(Symbol.Exclamation)): return Not(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Exclamation)): return Not(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Plus)): return UnaryPlus(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Plus)): return UnaryPlus(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Dash)): return UnaryMinus(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Dash)): return UnaryMinus(parse_unary(lexer))
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_unary(lexer))
return parse_primary(lexer) return parse_primary(lexer)
Precedence = Dict[Symbol, Callable[[Expression, Expression], Expression]] Precedence = Dict[Symbol, Callable[[Expression, Expression], Expression]]
@ -220,7 +220,6 @@ def parse_ternary(lexer: Lexer) -> Expression:
return Ternary(expression, if_true, if_false) return Ternary(expression, if_true, if_false)
def parse_expression(lexer: Lexer) -> Expression: def parse_expression(lexer: Lexer) -> Expression:
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_expression(lexer))
if lexer.take_token(KeywordToken(Keyword.Lambda)): if lexer.take_token(KeywordToken(Keyword.Lambda)):
parameters: List[TypeDeclaration] parameters: List[TypeDeclaration]
if lexer.take_token(SymbolToken(Symbol.EqualArrow)): if lexer.take_token(SymbolToken(Symbol.EqualArrow)):
@ -301,6 +300,10 @@ def parse_statement(lexer: Lexer) -> Statement:
elif lexer.take_token(KeywordToken(Keyword.Continue)): elif lexer.take_token(KeywordToken(Keyword.Continue)):
lexer.assert_token(SymbolToken(Symbol.Semicolon)) lexer.assert_token(SymbolToken(Symbol.Semicolon))
return ContinueStatement() return ContinueStatement()
elif lexer.take_token(KeywordToken(Keyword.Return)):
expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return ReturnStatement(expression)
elif lexer.take_token(KeywordToken(Keyword.Do)): elif lexer.take_token(KeywordToken(Keyword.Do)):
body = parse_statement(lexer) body = parse_statement(lexer)
condition: Optional[Expression] = None condition: Optional[Expression] = None
@ -310,7 +313,6 @@ def parse_statement(lexer: Lexer) -> Statement:
return DoWhileStatement(body, condition) return DoWhileStatement(body, condition)
elif lexer.take_token(KeywordToken(Keyword.Match)): elif lexer.take_token(KeywordToken(Keyword.Match)):
value = parse_expression(lexer) value = parse_expression(lexer)
lexer.assert_token(KeywordToken(Keyword.In)) # to prevent it from parsing it as a struct instantiation
lexer.assert_token(SymbolToken(Symbol.OpenCurly)) lexer.assert_token(SymbolToken(Symbol.OpenCurly))
cases: List[Tuple[Expression, Statement]] = [] cases: List[Tuple[Expression, Statement]] = []
@ -329,7 +331,7 @@ def parse_statement(lexer: Lexer) -> Statement:
body = parse_statement(lexer) body = parse_statement(lexer)
return ForLoop(variable, expression, body) return ForLoop(variable, expression, body)
elif lexer.take_token(KeywordToken(Keyword.Import)): elif lexer.take_token(KeywordToken(Keyword.Import)):
file = parse_expression(lexer) file = parse_string(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon)) lexer.assert_token(SymbolToken(Symbol.Semicolon))
return Import(file) return Import(file)
elif lexer.take_token(KeywordToken(Keyword.Type)): elif lexer.take_token(KeywordToken(Keyword.Type)):
@ -341,8 +343,10 @@ def parse_statement(lexer: Lexer) -> Statement:
elif lexer.take_token(KeywordToken(Keyword.Defer)): elif lexer.take_token(KeywordToken(Keyword.Defer)):
statement = parse_statement(lexer) statement = parse_statement(lexer)
return DeferStatement(statement) return DeferStatement(statement)
elif lexer.check_tokenkind(KeywordToken) and not lexer.check_tokens(KeywordToken(Keyword.Return), KeywordToken(Keyword.Lambda)): elif lexer.check_tokenkind(KeywordToken) and not lexer.check_token(KeywordToken(Keyword.Lambda)): # TODO: Maybe use '\' for lambda instead of a keyword
assert False, ("Unimplemented", lexer.next_token(), lexer.next_token(), lexer.next_token()) token = lexer.next_token()
assert isinstance(token.contents, KeywordToken)
raise SyntaxError(f"{token.loc}: Unexpected keyword: '{token.contents.keyword}'")
elif lexer.take_token(SymbolToken(Symbol.OpenCurly)): elif lexer.take_token(SymbolToken(Symbol.OpenCurly)):
statements: List[Statement] = [] statements: List[Statement] = []
while not lexer.take_token(SymbolToken(Symbol.CloseCurly)): while not lexer.take_token(SymbolToken(Symbol.CloseCurly)):

View File

@ -2,7 +2,7 @@ from typing import Callable, Dict, List, Tuple
from ppp_ast import Statements from ppp_ast import Statements
from ppp_object import Bool, EnumValue, Int, Object, Function, Str, TypeObject, Void, List as ListObject from ppp_object import Bool, EnumValue, Int, Object, Function, Str, TypeObject, Void, List as ListObject
from ppp_types import Bool as BoolType, FunctionType, GenericType, Int as IntType, Str as StrType, Type, TypeType, VariableType, Void as VoidType, Object as ObjectType, UnionType, ListType from ppp_types import Bool as BoolType, FunctionType, GenericType, Int as IntType, Str as StrType, Type, TypeType, VariableType, Void as VoidType, Object as ObjectType, ListType
def PythonFunction(name: str, parameters: List[Tuple[str, Type]], return_type: Type, func: Callable[..., Object]) -> Object: def PythonFunction(name: str, parameters: List[Tuple[str, Type]], return_type: Type, func: Callable[..., Object]) -> Object:
@ -41,7 +41,14 @@ def len_impl(list_: Object) -> Object:
case _: assert False, ("Unimplemented", list_) case _: assert False, ("Unimplemented", list_)
assert False assert False
Len = PythonFunction("len", [('list', UnionType([ListType(VariableType("")), StrType]))], IntType, len_impl) # TODO: Use polymorphism to make this work for both list<T> and str
Len = PythonFunction("len", [('list', ListType(VariableType("")))], IntType, len_impl)
def str_len_impl(str_: Object) -> Object:
assert isinstance(str_, Str)
return Int(len(str_.str))
StrLen = PythonFunction("strlen", [('string', StrType)], IntType, str_len_impl)
def str_to_int_impl(str_: Object) -> Object: def str_to_int_impl(str_: Object) -> Object:
assert isinstance(str_, Str) assert isinstance(str_, Str)
@ -100,6 +107,7 @@ variables: Dict[str, Object] = {
'debug_print': DebugPrint, 'debug_print': DebugPrint,
'read': Read, 'read': Read,
'len': Len, 'len': Len,
'str_len': StrLen,
'str_to_int': StrToInt, 'str_to_int': StrToInt,
'none': NoneObj, 'none': NoneObj,
'range': Range, 'range': Range,

View File

@ -1,7 +1,5 @@
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Literal, Tuple, Union
class Keyword(Enum): class Keyword(Enum):
Enum = 'enum' Enum = 'enum'
@ -25,6 +23,8 @@ class Keyword(Enum):
Type = 'type' Type = 'type'
Defer = 'defer' Defer = 'defer'
def __str__(self) -> str: return self._value_
class Symbol(Enum): class Symbol(Enum):
Open = '(' Open = '('
Close = ')' Close = ')'
@ -62,6 +62,8 @@ class Symbol(Enum):
Tilde = '~' Tilde = '~'
Carot = '^' Carot = '^'
def __str__(self) -> str: return self._value_
@dataclass @dataclass
class KeywordToken: class KeywordToken:
keyword: Keyword keyword: Keyword
@ -88,18 +90,19 @@ class SymbolToken:
@dataclass @dataclass
class EofToken: pass class EofToken: pass
TokenContents = Union[ TokenContents = KeywordToken | IdentifierToken | NumberToken | StringToken | SymbolToken | EofToken
KeywordToken,
IdentifierToken, @dataclass
NumberToken, class Location:
StringToken, file: str
SymbolToken, line: int
EofToken col: int
]
def __repr__(self) -> str:
return f"{self.file}:{self.line}:{self.col+1}"
@dataclass @dataclass
class Token: class Token:
line: int loc: Location
col: int
value: str value: str
contents: TokenContents contents: TokenContents

View File

@ -1,7 +1,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Tuple, Union from typing import Dict, List, Tuple
import sys import sys
sys.setrecursionlimit(1000) sys.setrecursionlimit(1000)
@ -40,10 +40,6 @@ class Type(ABC):
case VariableType(self_name), VariableType(other_name): case VariableType(self_name), VariableType(other_name):
return self_name == other_name return self_name == other_name
case _, VariableType(""): return True case _, VariableType(""): return True
case type, UnionType(types):
for union_type in types:
if type.is_subtype_of(union_type): return True
return False
case BoolType(), BoolType(): return True case BoolType(), BoolType(): return True
case type, ObjectType(): return True case type, ObjectType(): return True
case type_a, type_b if type_a.__class__ != type_b.__class__: return False case type_a, type_b if type_a.__class__ != type_b.__class__: return False
@ -164,41 +160,10 @@ class FunctionType(Type):
is_new_return_type, new_return_type = self.return_type.new_fill(types, stack+[id(self)]) is_new_return_type, new_return_type = self.return_type.new_fill(types, stack+[id(self)])
return (is_new_arguments or is_new_return_type, FunctionType(new_arguments, new_return_type)) return (is_new_arguments or is_new_return_type, FunctionType(new_arguments, new_return_type))
@dataclass
class UnionType(Type):
types: List[Type]
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.types = [type.fill(types, stack+[id(self)]) for type in self.types]
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
is_new, new_types = self.new_fill_list(self.types, types, stack)
return (is_new, UnionType(new_types))
def represent(self) -> str: return '('+'|'.join([type.represent() for type in self.types])+')'
class ObjectType(Primitive): class ObjectType(Primitive):
def represent(self) -> str: return 'object' def represent(self) -> str: return 'object'
Object = ObjectType() Object = ObjectType()
@dataclass
class ReturnType(Type):
type: Type
def represent(self) -> str: return f"return<{self.type.represent()}>"
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.type = self.type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new, new_type = self.type.new_fill(types, stack+[id(self)])
return (is_new, ReturnType(new_type))
num_expressions: int = 0 num_expressions: int = 0
@dataclass @dataclass