lexer: add keywords ("let") and their special handling

This commit is contained in:
Antoine Viallon 2023-05-23 00:50:40 +02:00
parent 109a8aad13
commit 81316ead45
Signed by: aviallon
GPG key ID: D126B13AB555E16F

View file

@ -4,10 +4,12 @@ import collections.abc
import enum import enum
import re import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import cast
from .logger import Logger from .logger import Logger
from .source import SourceLocation, Location from .source import SourceLocation, Location
from .typechecking import typecheck from .typechecking import typecheck
from .utils import implies
logger = Logger(__name__) logger = Logger(__name__)
@ -34,9 +36,15 @@ class Tokens(enum.Enum):
Parens_Right = re.compile(r"\)") Parens_Right = re.compile(r"\)")
Brace_Left = re.compile(r"\{") Brace_Left = re.compile(r"\{")
Brace_Right = re.compile(r"}") Brace_Right = re.compile(r"}")
KwLet = re.compile(r"\blet\b")
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*") Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
Equal = re.compile(r"=") Equal = re.compile(r"=")
Colon = re.compile(r":")
Semicolon = re.compile(r";") Semicolon = re.compile(r";")
Newline = re.compile(r"\n", flags=re.MULTILINE) Newline = re.compile(r"\n", flags=re.MULTILINE)
EOF = re.compile(r"\Z") EOF = re.compile(r"\Z")
Blank = re.compile(r"[ \t]+") Blank = re.compile(r"[ \t]+")
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
def __bool__(self): def __bool__(self):
return True return True
def is_keyword(self) -> bool:
return self in [Tokens.KwLet]
class Lexer(collections.abc.Sequence): class Lexer(collections.abc.Sequence):
def __init__(self, data: str): def __init__(self, data: str):
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
source=self.data), source=self.data),
value="" value=""
) )
token_kind: Tokens
for token_kind in list(Tokens): for token_kind in list(Tokens):
if token_kind == Tokens.Unknown: if token_kind == Tokens.Unknown:
continue continue
regex: re.Pattern = token_kind.value regex = cast(re.Pattern, token_kind.value)
match = regex.match(self.data, self.begin) match = regex.match(self.data, self.begin)
if match is not None: if match is not None:
logger.trace(f"Got match: {match}") logger.trace(f"Got match: {match}")
result = match.group(0) result = match.group(0)
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value): if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
logger.trace(
f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
continue
loc = SourceLocation( loc = SourceLocation(
begin=Location(line=self.line, character=self.character), begin=Location(line=self.line, character=self.character),
end=Location(line=self.line, character=self.character + len(result)) end=Location(line=self.line, character=self.character + len(result))