lexer: add keywords ("let") and their special handling

2023-05-23 00:50:40 +02:00 · 2023-05-23 00:50:40 +02:00 · 81316ead45
commit 81316ead45
parent 109a8aad13
1 changed files with 17 additions and 1 deletions
--- a/compiler/lexer.py
+++ b/compiler/lexer.py
@ -4,10 +4,12 @@ import collections.abc
 import enum
 import re
 from dataclasses import dataclass, field
+from typing import cast

 from .logger import Logger
 from .source import SourceLocation, Location
 from .typechecking import typecheck
+from .utils import implies

 logger = Logger(__name__)

@ -34,9 +36,15 @@ class Tokens(enum.Enum):
    Parens_Right = re.compile(r"\)")
    Brace_Left = re.compile(r"\{")
    Brace_Right = re.compile(r"}")
+
+    KwLet = re.compile(r"\blet\b")
+
    Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
+
    Equal = re.compile(r"=")
+    Colon = re.compile(r":")
    Semicolon = re.compile(r";")
+
    Newline = re.compile(r"\n", flags=re.MULTILINE)
    EOF = re.compile(r"\Z")
    Blank = re.compile(r"[ \t]+")
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
    def __bool__(self):
        return True

+    def is_keyword(self) -> bool:
+        return self in [Tokens.KwLet]
+

 class Lexer(collections.abc.Sequence):
    def __init__(self, data: str):
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
                                           source=self.data),
                                       value=""
                                       )
+            token_kind: Tokens
            for token_kind in list(Tokens):
                if token_kind == Tokens.Unknown:
                    continue
-                regex: re.Pattern = token_kind.value
+                regex = cast(re.Pattern, token_kind.value)
                match = regex.match(self.data, self.begin)
                if match is not None:
                    logger.trace(f"Got match: {match}")
                    result = match.group(0)
                    if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
+                        if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
+                            logger.trace(
+                                f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
+                            continue
                        loc = SourceLocation(
                            begin=Location(line=self.line, character=self.character),
                            end=Location(line=self.line, character=self.character + len(result))