lexer: add keywords ("let") and their special handling
This commit is contained in:
parent
109a8aad13
commit
81316ead45
1 changed files with 17 additions and 1 deletions
|
|
@ -4,10 +4,12 @@ import collections.abc
|
|||
import enum
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import cast
|
||||
|
||||
from .logger import Logger
|
||||
from .source import SourceLocation, Location
|
||||
from .typechecking import typecheck
|
||||
from .utils import implies
|
||||
|
||||
logger = Logger(__name__)
|
||||
|
||||
|
|
@ -34,9 +36,15 @@ class Tokens(enum.Enum):
|
|||
Parens_Right = re.compile(r"\)")
|
||||
Brace_Left = re.compile(r"\{")
|
||||
Brace_Right = re.compile(r"}")
|
||||
|
||||
KwLet = re.compile(r"\blet\b")
|
||||
|
||||
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
|
||||
|
||||
Equal = re.compile(r"=")
|
||||
Colon = re.compile(r":")
|
||||
Semicolon = re.compile(r";")
|
||||
|
||||
Newline = re.compile(r"\n", flags=re.MULTILINE)
|
||||
EOF = re.compile(r"\Z")
|
||||
Blank = re.compile(r"[ \t]+")
|
||||
|
|
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
|
|||
def __bool__(self):
|
||||
return True
|
||||
|
||||
def is_keyword(self) -> bool:
|
||||
return self in [Tokens.KwLet]
|
||||
|
||||
|
||||
class Lexer(collections.abc.Sequence):
|
||||
def __init__(self, data: str):
|
||||
|
|
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
|
|||
source=self.data),
|
||||
value=""
|
||||
)
|
||||
token_kind: Tokens
|
||||
for token_kind in list(Tokens):
|
||||
if token_kind == Tokens.Unknown:
|
||||
continue
|
||||
regex: re.Pattern = token_kind.value
|
||||
regex = cast(re.Pattern, token_kind.value)
|
||||
match = regex.match(self.data, self.begin)
|
||||
if match is not None:
|
||||
logger.trace(f"Got match: {match}")
|
||||
result = match.group(0)
|
||||
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
|
||||
if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
|
||||
logger.trace(
|
||||
f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
|
||||
continue
|
||||
loc = SourceLocation(
|
||||
begin=Location(line=self.line, character=self.character),
|
||||
end=Location(line=self.line, character=self.character + len(result))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue