lexer: add keywords ("let") and their special handling

This commit is contained in:
Antoine Viallon 2023-05-23 00:50:40 +02:00
parent 109a8aad13
commit 81316ead45
Signed by: aviallon
GPG key ID: D126B13AB555E16F

View file

@ -4,10 +4,12 @@ import collections.abc
import enum
import re
from dataclasses import dataclass, field
from typing import cast
from .logger import Logger
from .source import SourceLocation, Location
from .typechecking import typecheck
from .utils import implies
logger = Logger(__name__)
@ -34,9 +36,15 @@ class Tokens(enum.Enum):
Parens_Right = re.compile(r"\)")
Brace_Left = re.compile(r"\{")
Brace_Right = re.compile(r"}")
KwLet = re.compile(r"\blet\b")
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
Equal = re.compile(r"=")
Colon = re.compile(r":")
Semicolon = re.compile(r";")
Newline = re.compile(r"\n", flags=re.MULTILINE)
EOF = re.compile(r"\Z")
Blank = re.compile(r"[ \t]+")
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
def __bool__(self):
return True
def is_keyword(self) -> bool:
return self in [Tokens.KwLet]
class Lexer(collections.abc.Sequence):
def __init__(self, data: str):
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
source=self.data),
value=""
)
token_kind: Tokens
for token_kind in list(Tokens):
if token_kind == Tokens.Unknown:
continue
regex: re.Pattern = token_kind.value
regex = cast(re.Pattern, token_kind.value)
match = regex.match(self.data, self.begin)
if match is not None:
logger.trace(f"Got match: {match}")
result = match.group(0)
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
logger.trace(
f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
continue
loc = SourceLocation(
begin=Location(line=self.line, character=self.character),
end=Location(line=self.line, character=self.character + len(result))