lexer: add keywords ("let") and their special handling
This commit is contained in:
parent
109a8aad13
commit
81316ead45
1 changed files with 17 additions and 1 deletions
|
|
@ -4,10 +4,12 @@ import collections.abc
|
||||||
import enum
|
import enum
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
from .logger import Logger
|
from .logger import Logger
|
||||||
from .source import SourceLocation, Location
|
from .source import SourceLocation, Location
|
||||||
from .typechecking import typecheck
|
from .typechecking import typecheck
|
||||||
|
from .utils import implies
|
||||||
|
|
||||||
logger = Logger(__name__)
|
logger = Logger(__name__)
|
||||||
|
|
||||||
|
|
@ -34,9 +36,15 @@ class Tokens(enum.Enum):
|
||||||
Parens_Right = re.compile(r"\)")
|
Parens_Right = re.compile(r"\)")
|
||||||
Brace_Left = re.compile(r"\{")
|
Brace_Left = re.compile(r"\{")
|
||||||
Brace_Right = re.compile(r"}")
|
Brace_Right = re.compile(r"}")
|
||||||
|
|
||||||
|
KwLet = re.compile(r"\blet\b")
|
||||||
|
|
||||||
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
|
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
|
||||||
|
|
||||||
Equal = re.compile(r"=")
|
Equal = re.compile(r"=")
|
||||||
|
Colon = re.compile(r":")
|
||||||
Semicolon = re.compile(r";")
|
Semicolon = re.compile(r";")
|
||||||
|
|
||||||
Newline = re.compile(r"\n", flags=re.MULTILINE)
|
Newline = re.compile(r"\n", flags=re.MULTILINE)
|
||||||
EOF = re.compile(r"\Z")
|
EOF = re.compile(r"\Z")
|
||||||
Blank = re.compile(r"[ \t]+")
|
Blank = re.compile(r"[ \t]+")
|
||||||
|
|
@ -45,6 +53,9 @@ class Tokens(enum.Enum):
|
||||||
def __bool__(self):
|
def __bool__(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def is_keyword(self) -> bool:
|
||||||
|
return self in [Tokens.KwLet]
|
||||||
|
|
||||||
|
|
||||||
class Lexer(collections.abc.Sequence):
|
class Lexer(collections.abc.Sequence):
|
||||||
def __init__(self, data: str):
|
def __init__(self, data: str):
|
||||||
|
|
@ -81,15 +92,20 @@ class Lexer(collections.abc.Sequence):
|
||||||
source=self.data),
|
source=self.data),
|
||||||
value=""
|
value=""
|
||||||
)
|
)
|
||||||
|
token_kind: Tokens
|
||||||
for token_kind in list(Tokens):
|
for token_kind in list(Tokens):
|
||||||
if token_kind == Tokens.Unknown:
|
if token_kind == Tokens.Unknown:
|
||||||
continue
|
continue
|
||||||
regex: re.Pattern = token_kind.value
|
regex = cast(re.Pattern, token_kind.value)
|
||||||
match = regex.match(self.data, self.begin)
|
match = regex.match(self.data, self.begin)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
logger.trace(f"Got match: {match}")
|
logger.trace(f"Got match: {match}")
|
||||||
result = match.group(0)
|
result = match.group(0)
|
||||||
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
|
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
|
||||||
|
if not implies(best_result.kind.is_keyword(), token_kind.is_keyword()):
|
||||||
|
logger.trace(
|
||||||
|
f"Best match is a keyword ({best_result}) and current match ({token_kind}) is not, skipping")
|
||||||
|
continue
|
||||||
loc = SourceLocation(
|
loc = SourceLocation(
|
||||||
begin=Location(line=self.line, character=self.character),
|
begin=Location(line=self.line, character=self.character),
|
||||||
end=Location(line=self.line, character=self.character + len(result))
|
end=Location(line=self.line, character=self.character + len(result))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue