meta: initial commit
This commit is contained in:
commit
3d15b6dd63
7 changed files with 435 additions and 0 deletions
70
compiler/tokenizer.py
Normal file
70
compiler/tokenizer.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from beartype import beartype
|
||||
from beartype.typing import Optional
|
||||
|
||||
import enum
|
||||
import re
|
||||
|
||||
from .logger import logger
|
||||
from .source import SourceLocation, Location
|
||||
|
||||
@beartype
|
||||
@dataclass
|
||||
class Token:
|
||||
kind: Tokens
|
||||
loc: SourceLocation = field(compare=False, hash=False, default=None)
|
||||
value: Optional[str] = field(compare=False, hash=False, default=None)
|
||||
|
||||
def __repr__(self):
|
||||
if self.value is None:
|
||||
return super().__repr__()
|
||||
return f"{self.kind.name}({repr(self.value)})"
|
||||
|
||||
|
||||
class Tokens(enum.Enum):
|
||||
Number = re.compile(r"[0-9]+(\.?[0-9]*)")
|
||||
Op_Plus = re.compile(r"\+")
|
||||
Op_Minus = re.compile(r"-")
|
||||
Op_Multiply = re.compile(r"\*")
|
||||
Op_Divide = re.compile(r"/")
|
||||
Parens_Left = re.compile(r"\(")
|
||||
Parens_Right = re.compile(r"\)")
|
||||
Blank = re.compile(r"\s+")
|
||||
Unknown = re.compile(r".*")
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def tokenize(self, data: str) -> List[Token]:
|
||||
results: List[Token] = []
|
||||
begin = 0
|
||||
while begin < len(data):
|
||||
best_result: Token = Token(Tokens.Unknown,
|
||||
loc=SourceLocation(Location(line=0, character=begin), source=data)
|
||||
)
|
||||
for token_kind in Tokens:
|
||||
if token_kind == Tokens.Unknown:
|
||||
continue
|
||||
match = token_kind.value.match(data, begin)
|
||||
if match is not None:
|
||||
logger.debug(f"Got match: {match}")
|
||||
result = match.group(0)
|
||||
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
|
||||
loc = SourceLocation(
|
||||
begin=Location(line=0, character=begin),
|
||||
end=Location(line=0, character=begin + len(result))
|
||||
)
|
||||
best_result = Token(token_kind, value=result, loc=loc)
|
||||
logger.debug(f"New best match: {best_result}")
|
||||
|
||||
if best_result.kind == Tokens.Unknown:
|
||||
source_hint = best_result.loc.show_in_source()
|
||||
logger.error(f"{best_result.loc}: Unknown token '{best_result.loc.source_substring}'\n{source_hint}")
|
||||
exit(1)
|
||||
|
||||
results += [best_result]
|
||||
begin += len(best_result.value)
|
||||
return results
|
||||
Loading…
Add table
Add a link
Reference in a new issue