I plan to make a few changes to my language, and I will then update these files later. I am moving them because I don't want them polluting the main directory. I probably should have done this to begin with, but better late than never.
		
			
				
	
	
		
			156 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			156 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| import "ppp_tokens.ppp";
 | |
| 
 | |
| struct Lexer {
 | |
| 	source: str,
 | |
| 	location: int,
 | |
| 	line: int,
 | |
| 	col: int,
 | |
| 	peeked_token: OptionalToken
 | |
| }
 | |
| 
 | |
| func new_lexer(source: str) -> Lexer {
 | |
| 	return Lexer{
 | |
| 		source = source,
 | |
| 		location = 0,
 | |
| 		line = 1,
 | |
| 		col = 0,
 | |
| 		peeked_token = OptionalToken.None
 | |
| 	};
 | |
| }
 | |
| 
 | |
| func lexer_from_file(path: str) -> Lexer return new_lexer(read(path));
 | |
| 
 | |
| func is_space(char: str) -> bool {
 | |
| 	return char == " " || char == "\t" || char == "\n";
 | |
| }
 | |
| 
 | |
| func is_digit(char: str) -> bool {
 | |
| 	return char == "0" || char == "1" || char == "2" || char == "3" || char == "4" || char == "5" || char == "6" || char == "7" || char == "8" || char == "9";
 | |
| }
 | |
| 
 | |
| func is_alpha(char: str) -> bool {
 | |
| 	return char == "a" || char == "b" || char == "c" || char == "d" || char == "e" || char == "f" || char == "g" || char == "h" || char == "i" || char == "j" || char == "k" || char == "l" || char == "m" || char == "n" || char == "o" || char == "p" || char == "q" || char == "r" || char == "s" || char == "t" || char == "u" || char == "v" || char == "w" || char == "x" || char == "y" || char == "z" || char == "A" || char == "B" || char == "C" || char == "D" || char == "E" || char == "F" || char == "G" || char == "H" || char == "I" || char == "J" || char == "K" || char == "L" || char == "M" || char == "N" || char == "O" || char == "P" || char == "Q" || char == "R" || char == "S" || char == "T" || char == "U" || char == "V" || char == "W" || char == "X" || char == "Y" || char == "Z" || char == "_";
 | |
| }
 | |
| 
 | |
| func lexer_next_token(lexer: Lexer) -> Token {
 | |
| 	match lexer.peeked_token in {
 | |
| 		case Some(token) do {
 | |
| 			lexer.peeked_token = OptionalToken.None;
 | |
| 			return token;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	while lexer.location < len(lexer.source) && is_space(lexer.source[lexer.location]) do {
 | |
| 		if lexer.source[lexer.location] == "\n" do {
 | |
| 			lexer.line = lexer.line + 1;
 | |
| 			lexer.col = 0;
 | |
| 		}
 | |
| 		lexer.location = lexer.location + 1;
 | |
| 	}
 | |
| 
 | |
| 	if lexer.location >= len(lexer.source) return Token{line=lexer.line, col=lexer.col, value="\0", contents=TokenContents.Eof};
 | |
| 
 | |
| 	if is_digit(lexer.source[lexer.location]) do {
 | |
| 		number_str: str = "";
 | |
| 		while lexer.location < len(lexer.source) && is_digit(lexer.source[lexer.location]) do {
 | |
| 			number_str = number_str + lexer.source[lexer.location];
 | |
| 			lexer.location = lexer.location + 1;
 | |
| 		}
 | |
| 		number: int = str_to_int(number_str);
 | |
| 		return Token{line=lexer.line, col=lexer.col, value=number_str, contents=TokenContents.Number(number)};
 | |
| 	} else if is_alpha(lexer.source[lexer.location]) do {
 | |
| 		word_str: str = "";
 | |
| 		while lexer.location < len(lexer.source) && is_alpha(lexer.source[lexer.location]) do {
 | |
| 			word_str = word_str + lexer.source[lexer.location];
 | |
| 			lexer.location = lexer.location + 1;
 | |
| 		}
 | |
| 		match keyword_from_str(word_str) in {
 | |
| 			case Some(keyword) return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Keyword(keyword)};
 | |
| 			case None return Token{line=lexer.line, col=lexer.col, value=word_str, contents=TokenContents.Identifier(word_str)};
 | |
| 		}
 | |
| 		assert false, "Identifier";
 | |
| 	} else if lexer.source[lexer.location] == "\"" do {
 | |
| 		lexer.location = lexer.location + 1;
 | |
| 		string_str: str = "";
 | |
| 		escaping: bool = false;
 | |
| 		while lexer.location < len(lexer.source) && (lexer.source[lexer.location] != "\"" || escaping) do {
 | |
| 			escaping = escaping? false: lexer.source[lexer.location] == "\\";
 | |
| 			string_str = string_str + lexer.source[lexer.location];
 | |
| 			lexer.location = lexer.location + 1;
 | |
| 		}
 | |
| 		lexer.location = lexer.location + 1;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="\""+string_str+"\"", contents=TokenContents.String(string_str)};
 | |
| 	} else if lexer.source[lexer.location] == "|" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "|" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="||", contents=TokenContents.Symbol(Symbol.Dpipe)};
 | |
| 	} else if lexer.source[lexer.location] == "&" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "&" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="&&", contents=TokenContents.Symbol(Symbol.Dampersand)};
 | |
| 	} else if lexer.source[lexer.location] == "*" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "*" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="**", contents=TokenContents.Symbol(Symbol.Dasterisk)};
 | |
| 	} else if lexer.source[lexer.location] == "-" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == ">" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="->", contents=TokenContents.Symbol(Symbol.Arrow)};
 | |
| 	} else if lexer.source[lexer.location] == ">" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value=">=", contents=TokenContents.Symbol(Symbol.GreaterEqual)};
 | |
| 	} else if lexer.source[lexer.location] == "<" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="<=", contents=TokenContents.Symbol(Symbol.LesserEqual)};
 | |
| 	} else if lexer.source[lexer.location] == "=" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="==", contents=TokenContents.Symbol(Symbol.Dequal)};
 | |
| 	} else if lexer.source[lexer.location] == "!" && lexer.location < len(lexer.source)-1 && lexer.source[lexer.location+1] == "=" do {
 | |
| 		lexer.location = lexer.location + 2;
 | |
| 		return Token{line=lexer.line, col=lexer.col, value="!=", contents=TokenContents.Symbol(Symbol.NotEqual)};
 | |
| 	} else {
 | |
| 		match symbol_from_str(lexer.source[lexer.location]) in {
 | |
| 			case Some(symbol) do {
 | |
| 				lexer.location = lexer.location + 1;
 | |
| 				return Token{line=lexer.line, col=lexer.col, value=lexer.source[lexer.location-1], contents=TokenContents.Symbol(symbol)};
 | |
| 			}
 | |
| 			case None assert False, "Unimplemented, '%s'" % lexer.source[lexer.location];
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func lexer_peek_token(lexer: Lexer) -> Token {
 | |
| 	match lexer.peeked_token in {
 | |
| 		case Some(token) return token;
 | |
| 		case None do {
 | |
| 			token: Token = lexer_next_token(lexer);
 | |
| 			lexer.peeked_token = OptionalToken.Some(token);
 | |
| 			return token;
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func lexer_check_token(lexer: Lexer, expected: TokenContents) -> bool {
 | |
| 	token: Token = lexer_peek_token(lexer);
 | |
| 	return token.contents == expected;
 | |
| }
 | |
| 
 | |
| func lexer_take_token(lexer: Lexer, token: TokenContents) -> OptionalToken {
 | |
| 	if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
 | |
| 	return OptionalToken.None;
 | |
| }
 | |
| 
 | |
| func lexer_take_tokens(lexer: Lexer, tokens: TokenContents[]) -> OptionalToken {
 | |
| 	for token in tokens do {
 | |
| 		if lexer_check_token(lexer, token) return OptionalToken.Some(lexer_next_token(lexer));
 | |
| 	}
 | |
| 	return OptionalToken.None;
 | |
| }
 | |
| 
 | |
| func lexer_assert_token(lexer: Lexer, expected: TokenContents) -> Token {
 | |
| 	token: Token = lexer_next_token(lexer);
 | |
| 	assert token.contents == expected, "Expected %s but got %s!" % (token_contents_to_str(expected), token_to_str(token));
 | |
| 	return token;
 | |
| }
 | |
| 
 | |
| func lexer_check_tokens(lexer: Lexer, tokens: TokenContents[]) -> bool {
 | |
| 	for token in tokens if lexer_check_token(lexer, token) return true;
 | |
| 	return false;
 | |
| }
 |