Compare commits

..

No commits in common. "9e68c529bf02094994337f1dfb255a4de0361940" and "0dbc46bf9ccdd0aff89dae70c85dd408ed0b7f20" have entirely different histories.

8 changed files with 205 additions and 160 deletions

View File

@ -1,6 +1,6 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple, Union
### Types ### ### Types ###
@ -15,6 +15,13 @@ class TupleTypeExpr(TypeExpression):
def represent(self) -> str: def represent(self) -> str:
assert False, ("Unimplemented") assert False, ("Unimplemented")
@dataclass
class UnionTypeExpr(TypeExpression):
types: List[TypeExpression]
def represent(self) -> str:
assert False, ("Unimplemented")
@dataclass @dataclass
class ListTypeExpr(TypeExpression): class ListTypeExpr(TypeExpression):
type: TypeExpression type: TypeExpression
@ -137,7 +144,6 @@ class ArrayAccess(Expression):
@dataclass @dataclass
class Array(Expression): class Array(Expression):
element_type: TypeExpression
array: List[Expression] array: List[Expression]
def represent(self) -> str: def represent(self) -> str:
@ -194,7 +200,6 @@ class StructInstantiation(Expression):
@dataclass @dataclass
class LoopComprehension(Expression): class LoopComprehension(Expression):
element_type: TypeExpression
body: Expression body: Expression
variable: str # TODO: Pattern matching variable: str # TODO: Pattern matching
array: Expression array: Expression
@ -204,6 +209,16 @@ class LoopComprehension(Expression):
def precedence(self) -> int: return 13 def precedence(self) -> int: return 13
@dataclass
class Return(Expression):
expression: Expression
def represent(self) -> str:
# TODO: This will have to be improved
return "return "+self.wrap(self.expression)
def precedence(self) -> int: return 0
@dataclass @dataclass
class Lambda(Expression): class Lambda(Expression):
parameters: List[TypeDeclaration] parameters: List[TypeDeclaration]
@ -476,6 +491,7 @@ class DoWhileStatement(Statement):
body: Statement body: Statement
condition: Optional[Expression] condition: Optional[Expression]
# TODO: Maybe do something similar to return with these two?
@dataclass @dataclass
class BreakStatement(Statement): class BreakStatement(Statement):
pass pass
@ -484,10 +500,6 @@ class BreakStatement(Statement):
class ContinueStatement(Statement): class ContinueStatement(Statement):
pass pass
@dataclass
class ReturnStatement(Statement):
expression: Expression
@dataclass @dataclass
class MatchStatement(Statement): class MatchStatement(Statement):
value: Expression value: Expression
@ -506,7 +518,7 @@ class ForLoop(Statement):
@dataclass @dataclass
class Import(Statement): class Import(Statement):
file: str file: Expression
@dataclass @dataclass
class TypeDefinition(Statement): class TypeDefinition(Statement):

View File

@ -1,13 +1,13 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List as List_, Optional, Tuple from typing import Dict, List as List_, Optional, Tuple, Union
from ppp_ast import * from ppp_ast import *
from ppp_lexer import Lexer from ppp_lexer import Lexer
from ppp_object import Bool, EnumValue, Function, Int, Object, Str, Struct, Tuple as TupleObject, List as ListObject, TypeObject, Void from ppp_object import Bool, EnumValue, Function, Int, Object, Str, Struct, Tuple as TupleObject, List as ListObject, Return as ReturnObject, TypeObject, Void
from ppp_parser import is_valid_target, parse_statement from ppp_parser import is_valid_target, parse_statement
from ppp_tokens import EofToken from ppp_tokens import EofToken
from ppp_stdlib import variables from ppp_stdlib import variables
from ppp_types import EnumType, FunctionType, GenericType, Int as IntType, ListType, Str as StrType, StructType, TupleType, Type, TypeType, VariableType, Void as VoidType from ppp_types import EnumType, FunctionType, GenericType, Int as IntType, ListType, ReturnType, Str as StrType, StructType, TupleType, Type, TypeType, UnionType, VariableType, Void as VoidType
@dataclass @dataclass
class Declared: class Declared:
@ -31,7 +31,7 @@ class Constant:
def from_obj(obj: Object) -> 'Declared': def from_obj(obj: Object) -> 'Declared':
return Declared(obj.get_type(), obj) return Declared(obj.get_type(), obj)
VariableState = Declared | Undeclared | Constant VariableState = Union[Declared, Undeclared, Constant]
Module = Dict[str, VariableState] Module = Dict[str, VariableState]
@ -218,6 +218,9 @@ def calculate_expression(expression: Expression, program: ProgramState) -> Objec
case Int(num): return Str(left_value.str % num) case Int(num): return Str(left_value.str % num)
case _: assert False, ("Unimplemented", right_value) case _: assert False, ("Unimplemented", right_value)
assert False, ("Unimplemented", lhs, rhs) assert False, ("Unimplemented", lhs, rhs)
case Return(expression):
value = calculate_expression(expression, program)
return ReturnObject(ReturnType(value.get_type()), value)
case StructInstantiation(struct_, arguments_): case StructInstantiation(struct_, arguments_):
struct = calculate_expression(struct_, program) struct = calculate_expression(struct_, program)
assert isinstance(struct, TypeObject) assert isinstance(struct, TypeObject)
@ -288,26 +291,38 @@ def calculate_expression(expression: Expression, program: ProgramState) -> Objec
assert False, ("Unimplemented", expression_) assert False, ("Unimplemented", expression_)
case UnaryMinus (expression_): case UnaryMinus (expression_):
assert False, ("Unimplemented", expression_) assert False, ("Unimplemented", expression_)
case Array(element_type_, array_): case Array(array_):
element_type = calculate_type_expression(element_type_, program) if len(array_) == 0:
return ListObject(ListType(VariableType("")), [])
elements_type: Optional[Type] = None
array_elements_: List_[Object] = [] array_elements_: List_[Object] = []
for element_ in array_: for element_ in array_:
element = calculate_expression(element_, program) element = calculate_expression(element_, program)
assert element.get_type().is_subtype_of(element_type), (element, element_type) if elements_type:
assert element.get_type().is_subtype_of(elements_type), (element, elements_type)
else:
elements_type = element.get_type()
array_elements_.append(element) array_elements_.append(element)
return ListObject(ListType(element_type), array_elements_) assert elements_type
case LoopComprehension(element_type_, body_, variable, array_): return ListObject(ListType(elements_type), array_elements_)
element_type = calculate_type_expression(element_type_, program) case LoopComprehension(body_, variable, array_):
array = calculate_expression(array_, program) array = calculate_expression(array_, program)
assert array.get_type().is_indexable() assert array.get_type().is_indexable()
if isinstance(array, ListObject): if isinstance(array, ListObject):
elements: List_[Object] = [] elements: List_[Object] = []
elements_type = None
for element in array.list: for element in array.list:
program.push_context({variable: Declared.from_obj(element)}) program.push_context({variable: Declared.from_obj(element)})
elements.append(calculate_expression(body_, program)) elements.append(calculate_expression(body_, program))
program.pop_context() program.pop_context()
assert elements[-1].get_type().is_subtype_of(element_type) if elements_type:
return ListObject(ListType(element_type), elements) assert elements[-1].get_type().is_subtype_of(elements_type)
else:
elements_type = elements[-1].get_type()
if not elements: return ListObject(ListType(VariableType("")), [])
assert elements_type
return ListObject(ListType(elements_type), elements)
else: else:
assert False, ("Unimplemented", array) assert False, ("Unimplemented", array)
case _: case _:
@ -325,6 +340,8 @@ def calculate_type_expression(expression: TypeExpression, program: ProgramState,
return ListType(calculate_type_expression(type_, program, must_resolve)) return ListType(calculate_type_expression(type_, program, must_resolve))
case TupleTypeExpr(types_): case TupleTypeExpr(types_):
return TupleType([calculate_type_expression(type, program, must_resolve) for type in types_]) return TupleType([calculate_type_expression(type, program, must_resolve) for type in types_])
case UnionTypeExpr(types_):
return UnionType([calculate_type_expression(type, program, must_resolve) for type in types_])
case FunctionTypeExpr(arguments_, return_type_): case FunctionTypeExpr(arguments_, return_type_):
return FunctionType([calculate_type_expression(argument, program, must_resolve) for argument in arguments_], calculate_type_expression(return_type_, program, must_resolve)) return FunctionType([calculate_type_expression(argument, program, must_resolve) for argument in arguments_], calculate_type_expression(return_type_, program, must_resolve))
case TypeSpecification(type_, types_): case TypeSpecification(type_, types_):
@ -396,13 +413,14 @@ class BreakResult:
class NothingResult: class NothingResult:
pass pass
StatementsResult = ReturnResult | ContinueResult | BreakResult | NothingResult StatementsResult = Union[ReturnResult, ContinueResult, BreakResult, NothingResult]
def interpret_statements(statements: List_[Statement], program: ProgramState) -> StatementsResult: def interpret_statements(statements: List_[Statement], program: ProgramState) -> StatementsResult:
for statement in statements: for statement in statements:
match statement: match statement:
case ExpressionStatement(expression): case ExpressionStatement(expression):
calculate_expression(expression, program) value = calculate_expression(expression, program)
if isinstance(value, ReturnObject): return ReturnResult(value.value)
case Assignment(lhs, rhs, type_): case Assignment(lhs, rhs, type_):
assert is_valid_target(lhs) assert is_valid_target(lhs)
match lhs: match lhs:
@ -536,14 +554,14 @@ def interpret_statements(statements: List_[Statement], program: ProgramState) ->
case _: assert False, ("Unimplemented", return_value) case _: assert False, ("Unimplemented", return_value)
case ContinueStatement(): return ContinueResult() case ContinueStatement(): return ContinueResult()
case BreakStatement(): return BreakResult() case BreakStatement(): return BreakResult()
case ReturnStatement(expression=expression): case Import(file_):
return ReturnResult(calculate_expression(expression, program))
case Import(file):
# TODO: Maybe an inclusion system within a preprocessor maybe # TODO: Maybe an inclusion system within a preprocessor maybe
module = interpret_file(file, program.modules) if file not in program.modules else program.modules[file] file = calculate_expression(file_, program)
assert isinstance(file, Str), "Only strings are valid file paths!"
module = interpret_file(file.str, program.modules) if file.str not in program.modules else program.modules[file.str]
program.contexts[0] |= module program.contexts[0] |= module
if file not in program.modules: if file.str not in program.modules:
program.modules[file] = module program.modules[file.str] = module
case TypeDefinition(name, expression_): case TypeDefinition(name, expression_):
program.declare_and_assign_variable(name, TypeObject(calculate_type_expression(expression_, program))) program.declare_and_assign_variable(name, TypeObject(calculate_type_expression(expression_, program)))
case DeferStatement(statement=statement): case DeferStatement(statement=statement):
@ -564,7 +582,7 @@ def interpret_file(file_path: str, modules: Dict[str, Module]) -> Module:
assert len(program.contexts) == 2 assert len(program.contexts) == 2
match return_value: match return_value:
case NothingResult(): pass case NothingResult(): pass
case ReturnResult(_): assert False, "Cannot return from outside a function!" case ReturnObject(_): assert False, "Cannot return from outside a function!"
case ContinueResult(): assert False, "Cannot continue from outside a loop!" case ContinueResult(): assert False, "Cannot continue from outside a loop!"
case BreakResult(): assert False, "Cannot break from outside a loop!" case BreakResult(): assert False, "Cannot break from outside a loop!"
case _: assert False, ("Unimplemented", return_value) case _: assert False, ("Unimplemented", return_value)

View File

@ -1,32 +1,31 @@
from typing import Optional from typing import Optional
from ppp_tokens import EofToken, IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken, Token, TokenContents, Location from ppp_tokens import EofToken, IdentifierToken, Keyword, KeywordToken, NumberToken, StringToken, Symbol, SymbolToken, Token, TokenContents
class Lexer: class Lexer:
def __init__(self, source: str, filename: str) -> None: def __init__(self, source: str) -> None:
self._source = source self._source = source
self._location = 0 self._location = 0
self._line = 1 self._line = 1
self._col = 0 self._col = 0
self._filename = filename
self._peeked_token: Optional[Token] = None self._peeked_token: Optional[Token] = None
self._current: str = ""
def _loc(self) -> Location:
return Location(self._filename, self._line, self._col)
def _token(self, loc: Location, value: str, contents: TokenContents) -> Token:
return Token(loc, value, contents)
@classmethod @classmethod
def from_file(cls, path: str) -> 'Lexer': def from_file(cls, path: str) -> 'Lexer':
with open(path) as f: with open(path) as f:
return cls(f.read(), path) return cls(f.read())
def _advance(self): def _advance(self) -> str:
assert self._location < len(self._source) assert self._location < len(self._source)
self._line, self._col = (self._line + 1, 0) if self._source[self._location] == '\n' else (self._line, self._col + 1) self._line, self._col = (self._line + 1, 0) if self._current == '\n' else (self._line, self._col + 1)
self._location += 1 self._location += 1
self._current = self._source[self._location] if self._location < len(self._source) else ''
return self._current
# def _peek(self) -> str:
# assert self._location < len(self._source)-1
def next_token(self) -> Token: def next_token(self) -> Token:
if self._peeked_token is not None: if self._peeked_token is not None:
@ -35,84 +34,71 @@ class Lexer:
while self._location < len(self._source) and self._source[self._location] in ' \t\n': self._advance() while self._location < len(self._source) and self._source[self._location] in ' \t\n': self._advance()
if self._location >= len(self._source): return self._token(self._loc(), '\0', EofToken()) if self._location >= len(self._source): return Token(self._line, self._col, '\0', EofToken())
match self._source[self._location]: match self._source[self._location]:
case c if c.isdigit(): case c if c.isdigit():
start_location = self._location start_location = self._location
loc = self._loc() while self._location < len(self._source) and self._source[self._location].isdigit(): self._location += 1
while self._location < len(self._source) and self._source[self._location].isdigit(): self._advance()
number = int(self._source[start_location:self._location]) number = int(self._source[start_location:self._location])
return self._token(loc, self._source[start_location:self._location], NumberToken(number)) return Token(self._line, self._col, self._source[start_location:self._location], NumberToken(number))
case c if c.isalpha() or c == "_": case c if c.isalpha() or c == "_":
start_location = self._location start_location = self._location
loc = self._loc() while self._location < len(self._source) and (self._source[self._location].isalpha() or self._source[self._location] in '_'): self._location += 1
while self._location < len(self._source) and (self._source[self._location].isalpha() or self._source[self._location] in '_'): self._advance()
word = self._source[start_location:self._location] word = self._source[start_location:self._location]
try: try:
keyword = Keyword(word) keyword = Keyword(word)
return self._token(loc, word, KeywordToken(keyword)) return Token(self._line, self._col, word, KeywordToken(keyword))
except ValueError: except ValueError:
try: try:
symbol = Symbol(word) symbol = Symbol(word)
return self._token(loc, word, SymbolToken(symbol)) return Token(self._line, self._col, word, SymbolToken(symbol))
except ValueError: except ValueError:
return self._token(loc, word, IdentifierToken(word)) return Token(self._line, self._col, word, IdentifierToken(word))
case '"': case '"':
# TODO: Proper escaping # TODO: Escaping
self._advance() self._location += 1
start_location = self._location start_location = self._location
loc = self._loc()
escaping = False escaping = False
while self._location < len(self._source) and (self._source[self._location] != '"' or escaping): while self._location < len(self._source) and (self._source[self._location] != '"' or escaping):
escaping = self._source[self._location] == '\\' if not escaping else False escaping = self._source[self._location] == '\\' if not escaping else False
self._advance() self._location += 1
string = self._source[start_location:self._location].encode('utf-8').decode('unicode_escape') string = self._source[start_location:self._location].encode('utf-8').decode('unicode_escape')
self._advance() self._location += 1
return self._token(loc, self._source[start_location-1:self._location], StringToken(string)) return Token(self._line, self._col, self._source[start_location-1:self._location], StringToken(string))
# TODO: Make a proper Trie for this. # TODO: Make a proper Trie for this.
case '|' if self._location < len(self._source)-1 and self._source[self._location+1] == '|': case '|' if self._location < len(self._source)-1 and self._source[self._location+1] == '|':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dpipe))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dpipe))
case '&' if self._location < len(self._source)-1 and self._source[self._location+1] == '&': case '&' if self._location < len(self._source)-1 and self._source[self._location+1] == '&':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dampersand))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dampersand))
case '*' if self._location < len(self._source)-1 and self._source[self._location+1] == '*': case '*' if self._location < len(self._source)-1 and self._source[self._location+1] == '*':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dasterisk))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dasterisk))
case '-' if self._location < len(self._source)-1 and self._source[self._location+1] == '>': case '-' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Arrow))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Arrow))
case '>' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '>' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.GreaterEqual))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.GreaterEqual))
case '<' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '<' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.LesserEqual))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.LesserEqual))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.Dequal))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.Dequal))
case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '>': case '=' if self._location < len(self._source)-1 and self._source[self._location+1] == '>':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.EqualArrow))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.EqualArrow))
case '!' if self._location < len(self._source)-1 and self._source[self._location+1] == '=': case '!' if self._location < len(self._source)-1 and self._source[self._location+1] == '=':
loc = self._loc() self._location += 2
self._advance(); self._advance() return Token(self._line, self._col, self._source[self._location-2:self._location], SymbolToken(Symbol.NotEqual))
return self._token(loc, self._source[self._location-2:self._location], SymbolToken(Symbol.NotEqual))
case c if c in Symbol._value2member_map_: case c if c in Symbol._value2member_map_:
loc = self._loc() self._location += 1
self._advance() return Token(self._line, self._col, self._source[self._location-1], SymbolToken(Symbol(c)))
return self._token(loc, self._source[self._location-1], SymbolToken(Symbol(c)))
case _: case _:
raise SyntaxError(f"{self._loc()}: Unknown character: '{c}'") assert False, ("Unimplemented", c, self._location)
assert False, "Unreachable" assert False, "Unreachable"
def peek_token(self) -> Token: def peek_token(self) -> Token:
@ -122,12 +108,12 @@ class Lexer:
def assert_tokenkind(self, kind: type) -> Token: def assert_tokenkind(self, kind: type) -> Token:
token = self.next_token() token = self.next_token()
if not isinstance(token.contents, kind): raise SyntaxError(f"{token.loc}: Expected {kind} but got {token.contents}!") assert isinstance(token.contents, kind), (f"Expected {kind} but got {token.contents}!", self.next_token(), self.next_token(), self.next_token())
return token return token
def assert_token(self, expected: TokenContents) -> Token: def assert_token(self, expected: TokenContents) -> Token:
token = self.next_token() token = self.next_token()
if token.contents != expected: raise SyntaxError(f"{token.loc}: Expected {expected} but got {token.contents}!") assert token.contents == expected, (f"Expected {expected} but got {token.contents}!", self.next_token(), self.next_token())
return token return token
def check_token(self, expected: TokenContents) -> bool: def check_token(self, expected: TokenContents) -> bool:

View File

@ -1,9 +1,11 @@
# This file exists because I wanted to keep ppp_stdlib.py and ppp_interpreter.py seperate but they both rely on this one class.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Callable, Dict, List as List_, Tuple as Tuple_ from typing import Callable, Dict, List as List_, Tuple as Tuple_, Union as Union_
from ppp_ast import Statement from ppp_ast import Statement
from ppp_types import ArrayType, EnumType, FunctionType, ListType, StructType, TupleType, Type, Int as IntType, Str as StrType, Bool as BoolType, Void as VoidType, TypeType from ppp_types import ArrayType, EnumType, FunctionType, ListType, ReturnType, StructType, TupleType, Type, Int as IntType, Str as StrType, Bool as BoolType, Void as VoidType, TypeType
class Object(ABC): class Object(ABC):
@abstractmethod @abstractmethod
@ -66,6 +68,13 @@ class Function(Object):
def get_type(self) -> Type: return self.type def get_type(self) -> Type: return self.type
@dataclass
class Return(Object):
type: ReturnType
value: Object
def get_type(self) -> Type: return self.type
@dataclass @dataclass
class EnumValue(Object): class EnumValue(Object):
type: EnumType type: EnumType

View File

@ -23,10 +23,21 @@ def parse_type_primary(lexer: Lexer) -> TypeExpression:
if lexer.take_token(SymbolToken(Symbol.Open)): if lexer.take_token(SymbolToken(Symbol.Open)):
if lexer.take_token(SymbolToken(Symbol.Close)): return TupleTypeExpr([]) if lexer.take_token(SymbolToken(Symbol.Close)): return TupleTypeExpr([])
types: List[TypeExpression] = [parse_type(lexer)] def parse_union(lexer: Lexer) -> TypeExpression:
union_types: List[TypeExpression] = [parse_type(lexer)]
while lexer.take_token(SymbolToken(Symbol.Pipe)):
union_types.append(parse_type(lexer))
if len(union_types) == 1:
return union_types[0]
return UnionTypeExpr(union_types)
types: List[TypeExpression] = [parse_union(lexer)]
while lexer.take_token(SymbolToken(Symbol.Comma)): while lexer.take_token(SymbolToken(Symbol.Comma)):
types.append(parse_type(lexer)) types.append(parse_union(lexer))
lexer.assert_token(SymbolToken(Symbol.Close)) lexer.assert_token(SymbolToken(Symbol.Close))
if len(types) == 1 and isinstance(types[0], UnionTypeExpr):
base_type = types[0]
else:
base_type = TupleTypeExpr(types) base_type = TupleTypeExpr(types)
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)): elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
type = parse_type(lexer) type = parse_type(lexer)
@ -108,25 +119,21 @@ def parse_primary(lexer: Lexer) -> Expression:
else: else:
base_expression = elements[0] base_expression = elements[0]
elif lexer.take_token(SymbolToken(Symbol.OpenSquare)): elif lexer.take_token(SymbolToken(Symbol.OpenSquare)):
lexer.assert_token(SymbolToken(Symbol.Colon))
element_type = parse_type(lexer)
if lexer.take_token(SymbolToken(Symbol.CloseSquare)): if lexer.take_token(SymbolToken(Symbol.CloseSquare)):
base_expression = Array(element_type, []) base_expression = Array([])
else: else:
lexer.assert_token(SymbolToken(Symbol.Comma))
expressions: List[Expression] = [parse_expression(lexer)] expressions: List[Expression] = [parse_expression(lexer)]
if lexer.take_token(KeywordToken(Keyword.For)): if lexer.take_token(KeywordToken(Keyword.For)):
variable = parse_identifier(lexer) # TODO: Pattern matching variable = parse_identifier(lexer) # TODO: Pattern matching
lexer.assert_token(KeywordToken(Keyword.In)) lexer.assert_token(KeywordToken(Keyword.In))
expression = parse_expression(lexer) expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = LoopComprehension(element_type, expressions[0], variable, expression) base_expression = LoopComprehension(expressions[0], variable, expression)
else: else:
while lexer.take_token(SymbolToken(Symbol.Comma)): while lexer.take_token(SymbolToken(Symbol.Comma)):
expressions.append(parse_expression(lexer)) expressions.append(parse_expression(lexer))
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = Array(element_type, expressions) base_expression = Array(expressions)
elif lexer.check_tokenkind(StringToken): elif lexer.check_tokenkind(StringToken):
base_expression = String(parse_string(lexer)) base_expression = String(parse_string(lexer))
elif lexer.check_tokenkind(NumberToken): elif lexer.check_tokenkind(NumberToken):
@ -134,34 +141,13 @@ def parse_primary(lexer: Lexer) -> Expression:
else: else:
base_expression = Variable(parse_identifier(lexer)) base_expression = Variable(parse_identifier(lexer))
while (token := lexer.take_tokens(SymbolToken(Symbol.Open), SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Dot))): while (token := lexer.take_tokens(SymbolToken(Symbol.Open), SymbolToken(Symbol.OpenSquare), SymbolToken(Symbol.Dot), SymbolToken(Symbol.OpenCurly))):
match token.contents: match token.contents:
case SymbolToken(symbol): case SymbolToken(symbol):
match symbol: match symbol:
case Symbol.Dot: case Symbol.Dot:
next_token = lexer.next_token() field = parse_identifier(lexer)
match next_token.contents:
case IdentifierToken(identifier=field):
base_expression = FieldAccess(base_expression, field) base_expression = FieldAccess(base_expression, field)
case SymbolToken(symbol=symbol):
match symbol:
case Symbol.OpenCurly:
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = StructInstantiation(base_expression, [])
else:
def parse_argument() -> Tuple[str, Expression]:
parameter = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
return (parameter, parse_expression(lexer))
struct_arguments: List[Tuple[str, Expression]] = [parse_argument()]
while lexer.take_token(SymbolToken(Symbol.Comma)): struct_arguments.append(parse_argument())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = StructInstantiation(base_expression, struct_arguments)
case _:
raise SyntaxError(f"{next_token.loc}: Unexpected symbol: {repr(str(symbol))}")
case _:
raise SyntaxError(f"{next_token.loc}: Unexpected: {next_token.contents}")
case Symbol.Open: case Symbol.Open:
if lexer.take_token(SymbolToken(Symbol.Close)): if lexer.take_token(SymbolToken(Symbol.Close)):
base_expression = FunctionCall(base_expression, []) base_expression = FunctionCall(base_expression, [])
@ -175,6 +161,19 @@ def parse_primary(lexer: Lexer) -> Expression:
index = parse_expression(lexer) index = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.CloseSquare)) lexer.assert_token(SymbolToken(Symbol.CloseSquare))
base_expression = ArrayAccess(base_expression, index) base_expression = ArrayAccess(base_expression, index)
case Symbol.OpenCurly:
if lexer.take_token(SymbolToken(Symbol.CloseCurly)):
base_expression = StructInstantiation(base_expression, [])
else:
def parse_argument() -> Tuple[str, Expression]:
parameter = parse_identifier(lexer)
lexer.assert_token(SymbolToken(Symbol.Equal))
return (parameter, parse_expression(lexer))
struct_arguments: List[Tuple[str, Expression]] = [parse_argument()]
while lexer.take_token(SymbolToken(Symbol.Comma)): struct_arguments.append(parse_argument())
lexer.assert_token(SymbolToken(Symbol.CloseCurly))
base_expression = StructInstantiation(base_expression, struct_arguments)
case _: assert False, ("Unimplemented", symbol) case _: assert False, ("Unimplemented", symbol)
case _: assert False, ("Unimplemented", token) case _: assert False, ("Unimplemented", token)
@ -185,6 +184,7 @@ def parse_unary(lexer: Lexer) -> Expression:
if lexer.take_token(SymbolToken(Symbol.Exclamation)): return Not(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Exclamation)): return Not(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Plus)): return UnaryPlus(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Plus)): return UnaryPlus(parse_unary(lexer))
if lexer.take_token(SymbolToken(Symbol.Dash)): return UnaryMinus(parse_unary(lexer)) if lexer.take_token(SymbolToken(Symbol.Dash)): return UnaryMinus(parse_unary(lexer))
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_unary(lexer))
return parse_primary(lexer) return parse_primary(lexer)
Precedence = Dict[Symbol, Callable[[Expression, Expression], Expression]] Precedence = Dict[Symbol, Callable[[Expression, Expression], Expression]]
@ -220,6 +220,7 @@ def parse_ternary(lexer: Lexer) -> Expression:
return Ternary(expression, if_true, if_false) return Ternary(expression, if_true, if_false)
def parse_expression(lexer: Lexer) -> Expression: def parse_expression(lexer: Lexer) -> Expression:
if lexer.take_token(KeywordToken(Keyword.Return)): return Return(parse_expression(lexer))
if lexer.take_token(KeywordToken(Keyword.Lambda)): if lexer.take_token(KeywordToken(Keyword.Lambda)):
parameters: List[TypeDeclaration] parameters: List[TypeDeclaration]
if lexer.take_token(SymbolToken(Symbol.EqualArrow)): if lexer.take_token(SymbolToken(Symbol.EqualArrow)):
@ -300,10 +301,6 @@ def parse_statement(lexer: Lexer) -> Statement:
elif lexer.take_token(KeywordToken(Keyword.Continue)): elif lexer.take_token(KeywordToken(Keyword.Continue)):
lexer.assert_token(SymbolToken(Symbol.Semicolon)) lexer.assert_token(SymbolToken(Symbol.Semicolon))
return ContinueStatement() return ContinueStatement()
elif lexer.take_token(KeywordToken(Keyword.Return)):
expression = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon))
return ReturnStatement(expression)
elif lexer.take_token(KeywordToken(Keyword.Do)): elif lexer.take_token(KeywordToken(Keyword.Do)):
body = parse_statement(lexer) body = parse_statement(lexer)
condition: Optional[Expression] = None condition: Optional[Expression] = None
@ -313,6 +310,7 @@ def parse_statement(lexer: Lexer) -> Statement:
return DoWhileStatement(body, condition) return DoWhileStatement(body, condition)
elif lexer.take_token(KeywordToken(Keyword.Match)): elif lexer.take_token(KeywordToken(Keyword.Match)):
value = parse_expression(lexer) value = parse_expression(lexer)
lexer.assert_token(KeywordToken(Keyword.In)) # to prevent it from parsing it as a struct instantiation
lexer.assert_token(SymbolToken(Symbol.OpenCurly)) lexer.assert_token(SymbolToken(Symbol.OpenCurly))
cases: List[Tuple[Expression, Statement]] = [] cases: List[Tuple[Expression, Statement]] = []
@ -331,7 +329,7 @@ def parse_statement(lexer: Lexer) -> Statement:
body = parse_statement(lexer) body = parse_statement(lexer)
return ForLoop(variable, expression, body) return ForLoop(variable, expression, body)
elif lexer.take_token(KeywordToken(Keyword.Import)): elif lexer.take_token(KeywordToken(Keyword.Import)):
file = parse_string(lexer) file = parse_expression(lexer)
lexer.assert_token(SymbolToken(Symbol.Semicolon)) lexer.assert_token(SymbolToken(Symbol.Semicolon))
return Import(file) return Import(file)
elif lexer.take_token(KeywordToken(Keyword.Type)): elif lexer.take_token(KeywordToken(Keyword.Type)):
@ -343,10 +341,8 @@ def parse_statement(lexer: Lexer) -> Statement:
elif lexer.take_token(KeywordToken(Keyword.Defer)): elif lexer.take_token(KeywordToken(Keyword.Defer)):
statement = parse_statement(lexer) statement = parse_statement(lexer)
return DeferStatement(statement) return DeferStatement(statement)
elif lexer.check_tokenkind(KeywordToken) and not lexer.check_token(KeywordToken(Keyword.Lambda)): # TODO: Maybe use '\' for lambda instead of a keyword elif lexer.check_tokenkind(KeywordToken) and not lexer.check_tokens(KeywordToken(Keyword.Return), KeywordToken(Keyword.Lambda)):
token = lexer.next_token() assert False, ("Unimplemented", lexer.next_token(), lexer.next_token(), lexer.next_token())
assert isinstance(token.contents, KeywordToken)
raise SyntaxError(f"{token.loc}: Unexpected keyword: '{token.contents.keyword}'")
elif lexer.take_token(SymbolToken(Symbol.OpenCurly)): elif lexer.take_token(SymbolToken(Symbol.OpenCurly)):
statements: List[Statement] = [] statements: List[Statement] = []
while not lexer.take_token(SymbolToken(Symbol.CloseCurly)): while not lexer.take_token(SymbolToken(Symbol.CloseCurly)):

View File

@ -2,7 +2,7 @@ from typing import Callable, Dict, List, Tuple
from ppp_ast import Statements from ppp_ast import Statements
from ppp_object import Bool, EnumValue, Int, Object, Function, Str, TypeObject, Void, List as ListObject from ppp_object import Bool, EnumValue, Int, Object, Function, Str, TypeObject, Void, List as ListObject
from ppp_types import Bool as BoolType, FunctionType, GenericType, Int as IntType, Str as StrType, Type, TypeType, VariableType, Void as VoidType, Object as ObjectType, ListType from ppp_types import Bool as BoolType, FunctionType, GenericType, Int as IntType, Str as StrType, Type, TypeType, VariableType, Void as VoidType, Object as ObjectType, UnionType, ListType
def PythonFunction(name: str, parameters: List[Tuple[str, Type]], return_type: Type, func: Callable[..., Object]) -> Object: def PythonFunction(name: str, parameters: List[Tuple[str, Type]], return_type: Type, func: Callable[..., Object]) -> Object:
@ -41,14 +41,7 @@ def len_impl(list_: Object) -> Object:
case _: assert False, ("Unimplemented", list_) case _: assert False, ("Unimplemented", list_)
assert False assert False
# TODO: Use polymorphism to make this work for both list<T> and str Len = PythonFunction("len", [('list', UnionType([ListType(VariableType("")), StrType]))], IntType, len_impl)
Len = PythonFunction("len", [('list', ListType(VariableType("")))], IntType, len_impl)
def str_len_impl(str_: Object) -> Object:
assert isinstance(str_, Str)
return Int(len(str_.str))
StrLen = PythonFunction("strlen", [('string', StrType)], IntType, str_len_impl)
def str_to_int_impl(str_: Object) -> Object: def str_to_int_impl(str_: Object) -> Object:
assert isinstance(str_, Str) assert isinstance(str_, Str)
@ -107,7 +100,6 @@ variables: Dict[str, Object] = {
'debug_print': DebugPrint, 'debug_print': DebugPrint,
'read': Read, 'read': Read,
'len': Len, 'len': Len,
'str_len': StrLen,
'str_to_int': StrToInt, 'str_to_int': StrToInt,
'none': NoneObj, 'none': NoneObj,
'range': Range, 'range': Range,

View File

@ -1,5 +1,7 @@
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Literal, Tuple, Union
class Keyword(Enum): class Keyword(Enum):
Enum = 'enum' Enum = 'enum'
@ -23,8 +25,6 @@ class Keyword(Enum):
Type = 'type' Type = 'type'
Defer = 'defer' Defer = 'defer'
def __str__(self) -> str: return self._value_
class Symbol(Enum): class Symbol(Enum):
Open = '(' Open = '('
Close = ')' Close = ')'
@ -62,8 +62,6 @@ class Symbol(Enum):
Tilde = '~' Tilde = '~'
Carot = '^' Carot = '^'
def __str__(self) -> str: return self._value_
@dataclass @dataclass
class KeywordToken: class KeywordToken:
keyword: Keyword keyword: Keyword
@ -90,19 +88,18 @@ class SymbolToken:
@dataclass @dataclass
class EofToken: pass class EofToken: pass
TokenContents = KeywordToken | IdentifierToken | NumberToken | StringToken | SymbolToken | EofToken TokenContents = Union[
KeywordToken,
@dataclass IdentifierToken,
class Location: NumberToken,
file: str StringToken,
line: int SymbolToken,
col: int EofToken
]
def __repr__(self) -> str:
return f"{self.file}:{self.line}:{self.col+1}"
@dataclass @dataclass
class Token: class Token:
loc: Location line: int
col: int
value: str value: str
contents: TokenContents contents: TokenContents

View File

@ -1,7 +1,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Tuple from typing import Dict, List, Tuple, Union
import sys import sys
sys.setrecursionlimit(1000) sys.setrecursionlimit(1000)
@ -40,6 +40,10 @@ class Type(ABC):
case VariableType(self_name), VariableType(other_name): case VariableType(self_name), VariableType(other_name):
return self_name == other_name return self_name == other_name
case _, VariableType(""): return True case _, VariableType(""): return True
case type, UnionType(types):
for union_type in types:
if type.is_subtype_of(union_type): return True
return False
case BoolType(), BoolType(): return True case BoolType(), BoolType(): return True
case type, ObjectType(): return True case type, ObjectType(): return True
case type_a, type_b if type_a.__class__ != type_b.__class__: return False case type_a, type_b if type_a.__class__ != type_b.__class__: return False
@ -160,10 +164,41 @@ class FunctionType(Type):
is_new_return_type, new_return_type = self.return_type.new_fill(types, stack+[id(self)]) is_new_return_type, new_return_type = self.return_type.new_fill(types, stack+[id(self)])
return (is_new_arguments or is_new_return_type, FunctionType(new_arguments, new_return_type)) return (is_new_arguments or is_new_return_type, FunctionType(new_arguments, new_return_type))
@dataclass
class UnionType(Type):
types: List[Type]
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.types = [type.fill(types, stack+[id(self)]) for type in self.types]
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
is_new, new_types = self.new_fill_list(self.types, types, stack)
return (is_new, UnionType(new_types))
def represent(self) -> str: return '('+'|'.join([type.represent() for type in self.types])+')'
class ObjectType(Primitive): class ObjectType(Primitive):
def represent(self) -> str: return 'object' def represent(self) -> str: return 'object'
Object = ObjectType() Object = ObjectType()
@dataclass
class ReturnType(Type):
type: Type
def represent(self) -> str: return f"return<{self.type.represent()}>"
def fill(self, types: Dict[str, Type], stack: List[int]) -> Type:
if id(self) in stack: return self
self.type = self.type.fill(types, stack+[id(self)])
return self
def new_fill(self, types: Dict[str, Type], stack: List[int]) -> Tuple[bool, Type]:
assert id(self) not in stack
is_new, new_type = self.type.new_fill(types, stack+[id(self)])
return (is_new, ReturnType(new_type))
num_expressions: int = 0 num_expressions: int = 0
@dataclass @dataclass