lexer: add keywords ("let") and their special handling

2023-05-23 00:50:40 +02:00 · 2023-05-23 00:50:40 +02:00 · 81316ead45
commit 81316ead45
parent 109a8aad13
1 changed files with 17 additions and 1 deletions
--- a/compiler/lexer.py
+++ b/compiler/lexer.py
@ -4,10 +4,12 @@ import collections.abc
 import enum
 import re
 from dataclasses import dataclass, field
 from typing import cast
 from .logger import Logger
 from .source import SourceLocation, Location
 from .typechecking import typecheck
 from .utils import implies
 logger = Logger(__name__)
@ -34,9 +36,15 @@ class Tokens(enum.Enum):
    Parens_Right = re.compile(r"\)")
    Brace_Left = re.compile(r"\{")
    Brace_Right = re.compile(r"}")
    KwLet = re.compile(r"\blet\b")
    Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
    Equal = re.compile(r"=")
    Colon = re.compile(r":")
    Semicolon = re.compile(r";")
    Newline = re.compile(r"\n", flags=re.MULTILINE)
    EOF = re.compile(r"\Z")
    Blank = re.compile(r"[ \t]+")
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
    def __bool__(self):
        return True
    def is_keyword(self) -> bool:
        return self in [Tokens.KwLet]
 class Lexer(collections.abc.Sequence):
    def __init__(self, data: str):
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
                                           source=self.data),
                                       value=""
                                       )
            token_kind: Tokens
            for token_kind in list(Tokens):
                if token_kind == Tokens.Unknown:
                    continue
-                regex: re.Pattern = token_kind.value
+                regex = cast(re.Pattern, token_kind.value)
                match = regex.match(self.data, self.begin)
                if match is not None:
                    logger.trace(f"Got match: {match}")
                    result = match.group(0)
                    if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
                        if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
                            logger.trace(
                                f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
                            continue
                        loc = SourceLocation(
                            begin=Location(line=self.line, character=self.character),
                            end=Location(line=self.line, character=self.character + len(result))