meta: initial commit
This commit is contained in:
commit
3d15b6dd63
7 changed files with 435 additions and 0 deletions
0
compiler/__init__.py
Normal file
0
compiler/__init__.py
Normal file
86
compiler/__main__.py
Normal file
86
compiler/__main__.py
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .nodes import Node
|
||||||
|
from .rules import Rule, And, Or, Terminal, EvalResult
|
||||||
|
from .tokenizer import Tokenizer, Tokens, Token
|
||||||
|
|
||||||
|
data = "2 * (32.9 + 1)"
|
||||||
|
|
||||||
|
grammar = r"""
|
||||||
|
number = r"[0-9]+(\.[0-9]*)?"
|
||||||
|
operator = "+" | "-" | "*" | "/"
|
||||||
|
value = "(" expression ")" | number | expression
|
||||||
|
expression = value operator value
|
||||||
|
root = expression
|
||||||
|
"""
|
||||||
|
|
||||||
|
number = Rule.make_rule("number",
|
||||||
|
Terminal(token_type=Tokens.Number)
|
||||||
|
)
|
||||||
|
operator = Rule.make_rule("operator", Or(
|
||||||
|
Terminal(Tokens.Op_Plus), Terminal(Tokens.Op_Minus),
|
||||||
|
Terminal(Tokens.Op_Multiply), Terminal(Tokens.Op_Divide)
|
||||||
|
))
|
||||||
|
|
||||||
|
value = Rule.make_rule("value", Or(
|
||||||
|
And(
|
||||||
|
Terminal(Tokens.Parens_Left), operator, Terminal(Tokens.Parens_Right)
|
||||||
|
),
|
||||||
|
number,
|
||||||
|
"expression"
|
||||||
|
))
|
||||||
|
|
||||||
|
expression = Rule.make_rule("expression", And(
|
||||||
|
number,
|
||||||
|
operator,
|
||||||
|
number
|
||||||
|
))
|
||||||
|
|
||||||
|
root = expression
|
||||||
|
root.prepare()
|
||||||
|
|
||||||
|
|
||||||
|
def print_results(result: EvalResult) -> str:
|
||||||
|
prefix = "\t" * print_results._depth
|
||||||
|
message = ""
|
||||||
|
if type(result) != EvalResult:
|
||||||
|
message = f"{prefix}{str(result)}\n"
|
||||||
|
return message
|
||||||
|
|
||||||
|
print_results._depth += 1
|
||||||
|
|
||||||
|
if result.name is not None:
|
||||||
|
message += f"{prefix}{result.name}\n"
|
||||||
|
|
||||||
|
if type(result.result) == list:
|
||||||
|
message += prefix + "{"
|
||||||
|
mylist = []
|
||||||
|
for r in result.result:
|
||||||
|
mylist += [print_results(r)]
|
||||||
|
message += " ".join(mylist)
|
||||||
|
message += prefix + "}"
|
||||||
|
else:
|
||||||
|
message += print_results(result.result) + "\n"
|
||||||
|
print_results._depth -= 1
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
print_results._depth = 0
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
tokenizer = Tokenizer()
|
||||||
|
tokens = tokenizer.tokenize("2 + 3")
|
||||||
|
|
||||||
|
tokens = [token for token in tokens if token.kind != Tokens.Blank]
|
||||||
|
print(tokens)
|
||||||
|
|
||||||
|
result = root.evaluate(tokens)
|
||||||
|
print(result)
|
||||||
|
print(print_results(result))
|
||||||
|
if result.errors is not None:
|
||||||
|
raise result.errors
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
28
compiler/logger.py
Normal file
28
compiler/logger.py
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
import logging
|
||||||
|
import enum
|
||||||
|
|
||||||
|
|
||||||
|
class LogLevel(enum.IntEnum):
|
||||||
|
Critical = logging.CRITICAL
|
||||||
|
Error = logging.ERROR
|
||||||
|
Warning = logging.WARNING
|
||||||
|
Info = logging.INFO
|
||||||
|
Debug = logging.DEBUG
|
||||||
|
|
||||||
|
|
||||||
|
def make_logger(name: str, level: LogLevel = LogLevel.Debug) -> logging.Logger:
|
||||||
|
_logger = logging.getLogger(name)
|
||||||
|
_logger.setLevel(level)
|
||||||
|
# create console handler and set level to debug
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setLevel(level)
|
||||||
|
# create formatter
|
||||||
|
formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
|
||||||
|
ch.setFormatter(formatter)
|
||||||
|
# add ch to logger
|
||||||
|
_logger.addHandler(ch)
|
||||||
|
|
||||||
|
return _logger
|
||||||
|
|
||||||
|
|
||||||
|
logger = make_logger("compiler")
|
||||||
4
compiler/nodes.py
Normal file
4
compiler/nodes.py
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
pass
|
||||||
183
compiler/rules.py
Normal file
183
compiler/rules.py
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from beartype.typing import Union, Dict, Optional, List, Any, Tuple
|
||||||
|
import abc
|
||||||
|
|
||||||
|
from .nodes import Node
|
||||||
|
from .source import SourceLocation
|
||||||
|
from .tokenizer import Token, Tokens
|
||||||
|
from .logger import logger
|
||||||
|
|
||||||
|
class ParsingError(Exception):
|
||||||
|
def __init__(self, location: SourceLocation, message: str = "Unknown error"):
|
||||||
|
super().__init__(f"{message} at {str(location)}")
|
||||||
|
self.location = location
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EvalResult:
|
||||||
|
result: EvalResult | List[EvalResult] | Token | None = None
|
||||||
|
errors: Optional[ParsingError] = None
|
||||||
|
name: Optional[str] = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
#if isinstance(self.result, EvalResult):
|
||||||
|
# return self.result
|
||||||
|
|
||||||
|
name = self.name if self.name is not None else self.__class__.__name__
|
||||||
|
value = str(self.result)
|
||||||
|
if type(self.result) == list:
|
||||||
|
value = ", ".join(str(r) for r in self.result)
|
||||||
|
return f"{name}({value})"
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return super(EvalResult, self).__repr__()
|
||||||
|
|
||||||
|
RuleLike = Union[str, 'Rule']
|
||||||
|
|
||||||
|
class Rule(abc.ABC):
|
||||||
|
_named_rules: Dict[str, Rule] = dict()
|
||||||
|
_depth: int = 0
|
||||||
|
|
||||||
|
def __init__(self, *sub_rules: RuleLike):
|
||||||
|
self._prepared: bool = False
|
||||||
|
self._rules: List[RuleLike] = list(sub_rules)
|
||||||
|
self.rules: List[Rule] = []
|
||||||
|
self.name: Optional[str] = None
|
||||||
|
self.node: Optional[Node] = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def make_rule(name: str, rule: Rule, node: Optional[Node] = None) -> Rule:
|
||||||
|
rule.name = name
|
||||||
|
rule.node = node
|
||||||
|
Rule._named_rules[name] = rule
|
||||||
|
return rule
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def evaluate(self, tokens: List[Token]) -> EvalResult:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def prepare(self):
|
||||||
|
if self._prepared:
|
||||||
|
return
|
||||||
|
|
||||||
|
for key, rule in enumerate(self._rules):
|
||||||
|
if type(rule) == str:
|
||||||
|
self._rules[key] = Rule._named_rules[rule]
|
||||||
|
|
||||||
|
self.rules = self._rules
|
||||||
|
|
||||||
|
self._prepared = True
|
||||||
|
for rule in self._rules:
|
||||||
|
if not rule._prepared:
|
||||||
|
rule.prepare()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}:{}({})".format(self.__class__.__name__,
|
||||||
|
self.name if self.name is not None else "",
|
||||||
|
", ".join(rule.__class__.__name__ for rule in self.rules))
|
||||||
|
|
||||||
|
|
||||||
|
class Terminal(Rule):
|
||||||
|
def __init__(self, token_type: Tokens):
|
||||||
|
super().__init__()
|
||||||
|
self.token_type = token_type
|
||||||
|
|
||||||
|
def evaluate(self, tokens: List[Token]) -> EvalResult:
|
||||||
|
assert len(tokens) > 0
|
||||||
|
result = EvalResult(name=self.name)
|
||||||
|
logger.debug(f"{Rule._depth}: Terminal: Evaluating terminal token with tokens: '{tokens}'")
|
||||||
|
if len(tokens) != 1:
|
||||||
|
result.errors = ParsingError(tokens[0].loc, message=f"Terminal rule must have exactly one token")
|
||||||
|
return result
|
||||||
|
|
||||||
|
if tokens[0].kind != self.token_type:
|
||||||
|
result.errors = ParsingError(
|
||||||
|
tokens[0].loc,
|
||||||
|
message=f"Unexpected token. Wanted {self.token_type.name}, got {tokens[0].kind.name}"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
result.result = tokens[0]
|
||||||
|
|
||||||
|
logger.debug(f"{Rule._depth}: Terminal: Found terminal node: {result}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}({})".format(self.__class__.__name__, self.token_type)
|
||||||
|
|
||||||
|
class Or(Rule):
|
||||||
|
def evaluate(self, tokens: List[Token]) -> (Optional[Exception], Dict[str, Any]):
|
||||||
|
result = EvalResult(errors=ParsingError(location=tokens[0].loc), name=self.name)
|
||||||
|
rule: Rule
|
||||||
|
for rule in self.rules:
|
||||||
|
logger.debug(f"{Rule._depth}: Or: trying rule: {rule}")
|
||||||
|
Rule._depth += 1
|
||||||
|
result = rule.evaluate(tokens)
|
||||||
|
Rule._depth -= 1
|
||||||
|
if result.errors is None:
|
||||||
|
logger.debug(f"{Rule._depth}: Or: Rule '{rule}' matched, result: {result}")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.debug(f"{Rule._depth}: Or: Finished with errors: {result.errors}")
|
||||||
|
|
||||||
|
result.name = self.name
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class And(Rule):
|
||||||
|
def evaluate(self, tokens: List[Token]) -> (Optional[Exception], Dict[str, Any]):
|
||||||
|
result = EvalResult(errors=ParsingError(tokens[0].loc), name=self.name)
|
||||||
|
result.result = []
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i, rule in enumerate(self.rules):
|
||||||
|
logger.debug("%s", f"{Rule._depth}: And: Trying rule '{rule}'")
|
||||||
|
|
||||||
|
if end == len(tokens):
|
||||||
|
logger.error("%s", f"{Rule._depth}: And: Oops, reached the end of the tokens")
|
||||||
|
|
||||||
|
best_r: Optional[EvalResult] = None
|
||||||
|
while end < len(tokens):
|
||||||
|
tokens_ = tokens[begin:end+1]
|
||||||
|
Rule._depth += 1
|
||||||
|
r = rule.evaluate(tokens_)
|
||||||
|
Rule._depth -= 1
|
||||||
|
|
||||||
|
# No previous match, but we found one
|
||||||
|
if best_r is None and r.errors is None:
|
||||||
|
logger.debug(f"{Rule._depth}: And: Rule {i + 1}/{len(self.rules)} '{rule}' matched, result: {r}")
|
||||||
|
best_r = r
|
||||||
|
|
||||||
|
# No result at all. That's an error.
|
||||||
|
elif best_r is None and r.errors is not None:
|
||||||
|
result.errors = r.errors
|
||||||
|
return result
|
||||||
|
|
||||||
|
# We had a result, but we still matched with more tokens
|
||||||
|
elif best_r is not None and r.errors is None:
|
||||||
|
logger.debug(f"{Rule._depth}: And: Rule {i + 1}/{len(self.rules)} '{rule}' **improved**, result: {r}")
|
||||||
|
best_r = r
|
||||||
|
|
||||||
|
# We already have a match, and we can't improve it. Finish this rule.
|
||||||
|
elif best_r is not None and r.errors is not None:
|
||||||
|
logger.debug(f"{Rule._depth}: And: Rule {i + 1}/{len(self.rules)} '{rule}' has FINAL match, result: {best_r}")
|
||||||
|
result.result += [best_r]
|
||||||
|
# Matching rule ended at 'end - 1', meaning next rule will begin at end
|
||||||
|
begin = end
|
||||||
|
break
|
||||||
|
|
||||||
|
end += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"{Rule._depth}: And: Rule {i + 1}/{len(self.rules)} '{rule}' has FINAL match, finishing rule, result: {best_r}")
|
||||||
|
result.result += [best_r]
|
||||||
|
result.errors = None
|
||||||
|
|
||||||
|
if end != len(tokens):
|
||||||
|
logger.debug(f"{Rule._depth}: And: Didn't consume all tokens")
|
||||||
|
return result
|
||||||
64
compiler/source.py
Normal file
64
compiler/source.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from beartype import beartype
|
||||||
|
from beartype.typing import Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
@dataclass
|
||||||
|
class Location:
|
||||||
|
line: int
|
||||||
|
character: int
|
||||||
|
file: str = "<none>"
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"{self.file}:{self.line}:{self.character}"
|
||||||
|
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
class SourceLocation:
|
||||||
|
def __init__(self, begin: Location, end: Optional[Location] = None, source: Optional[str] = None):
|
||||||
|
self.begin = begin
|
||||||
|
self.end = end
|
||||||
|
if self.end is None:
|
||||||
|
self.end = self.begin
|
||||||
|
|
||||||
|
self.source = source
|
||||||
|
|
||||||
|
assert (self.begin.line, self.begin.character) <= (self.end.line, self.end.character)
|
||||||
|
assert self.begin.file == self.end.file
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.begin == self.end:
|
||||||
|
return str(self.begin)
|
||||||
|
return f"{str(self.begin)} - {str(self.end)}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source_substring(self) -> str:
|
||||||
|
source = self.source.splitlines(keepends=False)
|
||||||
|
source_lines = source[self.begin.line:self.end.line + 1]
|
||||||
|
if len(source_lines) == 1:
|
||||||
|
source_lines[0] = source_lines[0][self.begin.character:self.end.character + 1]
|
||||||
|
else:
|
||||||
|
source_lines[0] = source_lines[0][self.begin.character:]
|
||||||
|
source_lines[-1] = source_lines[-1][:self.end.character]
|
||||||
|
return "\n".join(source_lines)
|
||||||
|
|
||||||
|
def show_in_source(self) -> str:
|
||||||
|
source = self.source.splitlines(keepends=False)
|
||||||
|
source_line = source[self.begin.line]
|
||||||
|
result = [source_line]
|
||||||
|
if self.begin.line != self.end.line:
|
||||||
|
return "\n".join(result)
|
||||||
|
|
||||||
|
line = " " * self.begin.character
|
||||||
|
line += "^" + "-" * max(0, (self.end.character - self.begin.character - 1))
|
||||||
|
line += " " * (len(source_line) - len(line))
|
||||||
|
|
||||||
|
result += [line]
|
||||||
|
|
||||||
|
return "\n".join(result)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
70
compiler/tokenizer.py
Normal file
70
compiler/tokenizer.py
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from beartype import beartype
|
||||||
|
from beartype.typing import Optional
|
||||||
|
|
||||||
|
import enum
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .logger import logger
|
||||||
|
from .source import SourceLocation, Location
|
||||||
|
|
||||||
|
@beartype
|
||||||
|
@dataclass
|
||||||
|
class Token:
|
||||||
|
kind: Tokens
|
||||||
|
loc: SourceLocation = field(compare=False, hash=False, default=None)
|
||||||
|
value: Optional[str] = field(compare=False, hash=False, default=None)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if self.value is None:
|
||||||
|
return super().__repr__()
|
||||||
|
return f"{self.kind.name}({repr(self.value)})"
|
||||||
|
|
||||||
|
|
||||||
|
class Tokens(enum.Enum):
|
||||||
|
Number = re.compile(r"[0-9]+(\.?[0-9]*)")
|
||||||
|
Op_Plus = re.compile(r"\+")
|
||||||
|
Op_Minus = re.compile(r"-")
|
||||||
|
Op_Multiply = re.compile(r"\*")
|
||||||
|
Op_Divide = re.compile(r"/")
|
||||||
|
Parens_Left = re.compile(r"\(")
|
||||||
|
Parens_Right = re.compile(r"\)")
|
||||||
|
Blank = re.compile(r"\s+")
|
||||||
|
Unknown = re.compile(r".*")
|
||||||
|
|
||||||
|
|
||||||
|
class Tokenizer:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def tokenize(self, data: str) -> List[Token]:
|
||||||
|
results: List[Token] = []
|
||||||
|
begin = 0
|
||||||
|
while begin < len(data):
|
||||||
|
best_result: Token = Token(Tokens.Unknown,
|
||||||
|
loc=SourceLocation(Location(line=0, character=begin), source=data)
|
||||||
|
)
|
||||||
|
for token_kind in Tokens:
|
||||||
|
if token_kind == Tokens.Unknown:
|
||||||
|
continue
|
||||||
|
match = token_kind.value.match(data, begin)
|
||||||
|
if match is not None:
|
||||||
|
logger.debug(f"Got match: {match}")
|
||||||
|
result = match.group(0)
|
||||||
|
if best_result.kind == Tokens.Unknown or len(result) >= len(best_result.value):
|
||||||
|
loc = SourceLocation(
|
||||||
|
begin=Location(line=0, character=begin),
|
||||||
|
end=Location(line=0, character=begin + len(result))
|
||||||
|
)
|
||||||
|
best_result = Token(token_kind, value=result, loc=loc)
|
||||||
|
logger.debug(f"New best match: {best_result}")
|
||||||
|
|
||||||
|
if best_result.kind == Tokens.Unknown:
|
||||||
|
source_hint = best_result.loc.show_in_source()
|
||||||
|
logger.error(f"{best_result.loc}: Unknown token '{best_result.loc.source_substring}'\n{source_hint}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
results += [best_result]
|
||||||
|
begin += len(best_result.value)
|
||||||
|
return results
|
||||||
Loading…
Add table
Add a link
Reference in a new issue