meta: initial commit

This commit is contained in:
Antoine Viallon 2023-04-20 21:52:13 +02:00
commit 3d15b6dd63
Signed by: aviallon
GPG key ID: D126B13AB555E16F
7 changed files with 435 additions and 0 deletions

70
compiler/tokenizer.py Normal file
View file

@ -0,0 +1,70 @@
from __future__ import annotations
from dataclasses import dataclass, field
from beartype import beartype
from beartype.typing import Optional
import enum
import re
from .logger import logger
from .source import SourceLocation, Location
@beartype
@dataclass
class Token:
kind: Tokens
loc: SourceLocation = field(compare=False, hash=False, default=None)
value: Optional[str] = field(compare=False, hash=False, default=None)
def __repr__(self):
if self.value is None:
return super().__repr__()
return f"{self.kind.name}({repr(self.value)})"
class Tokens(enum.Enum):
Number = re.compile(r"[0-9]+(\.?[0-9]*)")
Op_Plus = re.compile(r"\+")
Op_Minus = re.compile(r"-")
Op_Multiply = re.compile(r"\*")
Op_Divide = re.compile(r"/")
Parens_Left = re.compile(r"\(")
Parens_Right = re.compile(r"\)")
Blank = re.compile(r"\s+")
Unknown = re.compile(r".*")
class Tokenizer:
def __init__(self):
pass
def tokenize(self, data: str) -> List[Token]:
results: List[Token] = []
begin = 0
while begin < len(data):
best_result: Token = Token(Tokens.Unknown,
loc=SourceLocation(Location(line=0, character=begin), source=data)
)
for token_kind in Tokens:
if token_kind == Tokens.Unknown:
continue
match = token_kind.value.match(data, begin)
if match is not None:
logger.debug(f"Got match: {match}")
result = match.group(0)
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
loc = SourceLocation(
begin=Location(line=0, character=begin),
end=Location(line=0, character=begin + len(result))
)
best_result = Token(token_kind, value=result, loc=loc)
logger.debug(f"New best match: {best_result}")
if best_result.kind == Tokens.Unknown:
source_hint = best_result.loc.show_in_source()
logger.error(f"{best_result.loc}: Unknown token '{best_result.loc.source_substring}'\n{source_hint}")
exit(1)
results += [best_result]
begin += len(best_result.value)
return results