nodes+parser+semantic+ir: add variables
References + assignments
This commit is contained in:
parent
e9324f4f71
commit
223c3be819
6 changed files with 207 additions and 50 deletions
|
|
@ -12,8 +12,8 @@ from .tokenizer import Tokenizer, Tokens
|
|||
|
||||
def main():
|
||||
data = """
|
||||
2 + 3 / (8 - 1 + 3) * 1
|
||||
+ 34.2
|
||||
2 + 8 - 1 * (byte = 3 + 5)
|
||||
/ (byte = 255) + byte
|
||||
"""
|
||||
print("Source:\n", data)
|
||||
tokenizer = Tokenizer()
|
||||
|
|
@ -28,8 +28,11 @@ def main():
|
|||
ast = parser.parse()
|
||||
ast.pprint(depth=10)
|
||||
|
||||
context = semantic.Context()
|
||||
intermediate_representation = ast.intermediate_representation(context=context)
|
||||
context = semantic.Context("root")
|
||||
ast.semantic_analysis(context)
|
||||
print(context)
|
||||
|
||||
intermediate_representation = ast.intermediate_representation()
|
||||
|
||||
messages = []
|
||||
for ir_item in intermediate_representation:
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class IRValue(IRItem, abc.ABC):
|
|||
class IRMove(IRAction):
|
||||
|
||||
@beartype
|
||||
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
|
||||
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
|
||||
super().__init__(location)
|
||||
self.dest = dest
|
||||
self.source = source
|
||||
|
|
@ -61,7 +61,11 @@ class IRImmediate(IRValue):
|
|||
return f"{self.value}"
|
||||
|
||||
|
||||
class IRRegister(IRValue):
|
||||
class IRAssignable(IRValue, metaclass=abc.ABCMeta):
|
||||
pass
|
||||
|
||||
|
||||
class IRRegister(IRAssignable):
|
||||
register_id = 0
|
||||
|
||||
def __init__(self, location: SourceLocation):
|
||||
|
|
@ -73,10 +77,19 @@ class IRRegister(IRValue):
|
|||
return f"%r{self.id}"
|
||||
|
||||
|
||||
class IRVariable(IRAssignable):
|
||||
def __init__(self, location: SourceLocation, fq_identifier: str):
|
||||
super().__init__(location)
|
||||
self.fq_identifier = fq_identifier
|
||||
|
||||
def codegen(self) -> str:
|
||||
return f"@{self.fq_identifier}"
|
||||
|
||||
|
||||
class IRAdd(IRAction):
|
||||
|
||||
@beartype
|
||||
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue):
|
||||
def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
|
||||
super().__init__(location)
|
||||
assert all(isinstance(v, IRValue) for v in values)
|
||||
|
||||
|
|
@ -94,7 +107,7 @@ class IRAdd(IRAction):
|
|||
class IRMul(IRAction):
|
||||
|
||||
@beartype
|
||||
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue):
|
||||
def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
|
||||
super().__init__(location)
|
||||
assert all(isinstance(v, IRValue) for v in values)
|
||||
|
||||
|
|
@ -112,7 +125,7 @@ class IRMul(IRAction):
|
|||
class IRNegation(IRAction):
|
||||
|
||||
@beartype
|
||||
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
|
||||
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
|
||||
super().__init__(location)
|
||||
|
||||
self.source = source
|
||||
|
|
@ -128,7 +141,7 @@ class IRNegation(IRAction):
|
|||
class IRInvert(IRAction):
|
||||
|
||||
@beartype
|
||||
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
|
||||
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
|
||||
super().__init__(location)
|
||||
|
||||
self.source = source
|
||||
|
|
|
|||
|
|
@ -16,8 +16,11 @@ logger = Logger(__name__)
|
|||
|
||||
class Node:
|
||||
|
||||
def __init__(self):
|
||||
self.context: semantic.Context | None = None
|
||||
|
||||
@abstractmethod
|
||||
def _values(self) -> Any | list[Node]:
|
||||
def _values(self) -> list[Node | Any]:
|
||||
raise NotImplementedError(f"Please override {__name__}")
|
||||
|
||||
@functools.cache
|
||||
|
|
@ -62,13 +65,19 @@ class Node:
|
|||
return result
|
||||
|
||||
@abstractmethod
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}")
|
||||
|
||||
def semantic_analysis(self, context: semantic.Context):
|
||||
logger.debug(f"Doing semantic analysis in {self}")
|
||||
for value in self._values():
|
||||
if isinstance(value, Node):
|
||||
value.semantic_analysis(context)
|
||||
self.context = context
|
||||
|
||||
@staticmethod
|
||||
def _prepare_sources_ir(result: list[ir.IRAction],
|
||||
context: semantic.Context, values: Iterable[Value]) -> list[ir.IRValue]:
|
||||
vals = [value.intermediate_representation(context) for value in values]
|
||||
def _prepare_sources_ir(result: list[ir.IRAction], values: Iterable[Value]) -> list[ir.IRValue]:
|
||||
vals = [value.intermediate_representation() for value in values]
|
||||
for value in vals:
|
||||
result += value
|
||||
|
||||
|
|
@ -77,19 +86,20 @@ class Node:
|
|||
|
||||
class Literal(Node, ABC):
|
||||
def __init__(self, location: SourceLocation, value: Any):
|
||||
super().__init__()
|
||||
self.value = value
|
||||
self.loc = location
|
||||
|
||||
def location(self) -> SourceLocation:
|
||||
return self.loc
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
return self.value
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return [self.value]
|
||||
|
||||
def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]:
|
||||
return [f"{indent}{repr(self)}"]
|
||||
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
dest = ir.IRRegister(location=self.location())
|
||||
immediate = ir.IRImmediate(location=self.location(), value=self.value)
|
||||
result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)]
|
||||
|
|
@ -97,30 +107,31 @@ class Literal(Node, ABC):
|
|||
|
||||
|
||||
class Sum(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
result: list[ir.IRAction] = []
|
||||
|
||||
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
|
||||
values_results = Node._prepare_sources_ir(result=result, values=self.values)
|
||||
|
||||
dest = ir.IRRegister(location=self.location())
|
||||
result += [ir.IRAdd(self.location(), dest, *values_results)]
|
||||
return result
|
||||
|
||||
def __init__(self, *values: Value):
|
||||
super().__init__()
|
||||
self.values = values
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
return self.values
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return list(self.values)
|
||||
|
||||
|
||||
class Sub(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
result: list[ir.IRAction] = []
|
||||
|
||||
first_val = self.first_value.intermediate_representation(context)
|
||||
first_val = self.first_value.intermediate_representation()
|
||||
result += first_val
|
||||
|
||||
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
|
||||
values_results = Node._prepare_sources_ir(result=result, values=self.values)
|
||||
|
||||
for i, value_result in enumerate(values_results):
|
||||
d = ir.IRRegister(location=self.location())
|
||||
|
|
@ -134,37 +145,39 @@ class Sub(Node):
|
|||
return result
|
||||
|
||||
def __init__(self, first_value: Value, *values: Value):
|
||||
super().__init__()
|
||||
self.first_value = first_value
|
||||
self.values = values
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return [self.first_value] + list(self.values)
|
||||
|
||||
|
||||
class Product(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
result: list[ir.IRAction] = []
|
||||
|
||||
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
|
||||
values_results = Node._prepare_sources_ir(result=result, values=self.values)
|
||||
dest = ir.IRRegister(location=self.location())
|
||||
result += [ir.IRMul(self.location(), dest, *values_results)]
|
||||
return result
|
||||
|
||||
def __init__(self, *values: Value):
|
||||
super().__init__()
|
||||
self.values = values
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
return self.values
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return list(self.values)
|
||||
|
||||
|
||||
class Division(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
result: list[ir.IRAction] = []
|
||||
|
||||
first_val = self.first_value.intermediate_representation(context)
|
||||
first_val = self.first_value.intermediate_representation()
|
||||
result += first_val
|
||||
|
||||
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
|
||||
values_results = Node._prepare_sources_ir(result=result, values=self.values)
|
||||
|
||||
for i, value_result in enumerate(values_results):
|
||||
d = ir.IRRegister(location=self.location())
|
||||
|
|
@ -178,10 +191,11 @@ class Division(Node):
|
|||
return result
|
||||
|
||||
def __init__(self, first_value: Value, *values: Value):
|
||||
super().__init__()
|
||||
self.first_value = first_value
|
||||
self.values = values
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return [self.first_value] + list(self.values)
|
||||
|
||||
|
||||
|
|
@ -202,35 +216,76 @@ class Integer(Literal):
|
|||
|
||||
|
||||
class Expression(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
return self.node.intermediate_representation(context)
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
return self.node.intermediate_representation()
|
||||
|
||||
def __init__(self, node: Node):
|
||||
super().__init__()
|
||||
self.node = node
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
return self.node
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return [self.node]
|
||||
|
||||
def semantic_analysis(self, context: semantic.Context):
|
||||
return self.node.semantic_analysis(context)
|
||||
|
||||
def location(self) -> SourceLocation:
|
||||
return self.node.location()
|
||||
|
||||
|
||||
class Identifier(Literal):
|
||||
def __init__(self, name: str):
|
||||
super().__init__(name)
|
||||
def __init__(self, location: SourceLocation, name: str):
|
||||
super().__init__(location, name)
|
||||
self.value: str
|
||||
|
||||
|
||||
class Variable(Literal):
|
||||
def __init__(self, location: SourceLocation, identifier: Identifier):
|
||||
super().__init__(location, None)
|
||||
self.identifier = identifier
|
||||
|
||||
def semantic_analysis(self, context: semantic.Context):
|
||||
variable = context.get_variable(self.identifier.value)
|
||||
if variable is None:
|
||||
raise SemanticAnalysisError(location=self.location(), message=f"Unknown variable '{self.identifier.value}'")
|
||||
|
||||
self.value = variable
|
||||
logger.debug(f"Linked variable reference to var {variable}")
|
||||
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
result = [ir.IRVariable(location=self.location(), fq_identifier=self.value.fully_qualified_name())]
|
||||
return result
|
||||
|
||||
|
||||
class Assignment(Node):
|
||||
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
|
||||
return self.value.intermediate_representation(context)
|
||||
def intermediate_representation(self) -> list[ir.IRItem]:
|
||||
assert self.variable is not None
|
||||
|
||||
result: list[ir.IRItem] = []
|
||||
value = self.value.intermediate_representation()
|
||||
result += value
|
||||
|
||||
dest = ir.IRVariable(location=self.location(), fq_identifier=self.variable.fully_qualified_name())
|
||||
result += [ir.IRMove(location=self.location(), dest=dest.destination(), source=value[-1].destination())]
|
||||
|
||||
return result
|
||||
|
||||
def __init__(self, identifier: Identifier, value: Value):
|
||||
super().__init__()
|
||||
self.identifier = identifier
|
||||
self.value = value
|
||||
self.variable: semantic.Variable | None = None
|
||||
|
||||
def _values(self) -> Any | list[Node]:
|
||||
def _values(self) -> list[Node | Any]:
|
||||
return [self.identifier, self.value]
|
||||
|
||||
def semantic_analysis(self, context: semantic.Context):
|
||||
super(Assignment, self).semantic_analysis(context)
|
||||
name = self.identifier.value
|
||||
variable = context.set_variable(name, value=self.value)
|
||||
self.variable = variable
|
||||
logger.debug(f"Added variable {variable} to context {context.fully_qualified_name()}")
|
||||
|
||||
|
||||
Number = Float | Integer
|
||||
Value = BinaryOperation | Number
|
||||
Value = BinaryOperation | Number | Variable
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ from beartype.typing import List, Dict, Callable
|
|||
|
||||
from .errors import CompilationError, UnexpectedTokenError
|
||||
from .logger import Logger
|
||||
from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression
|
||||
from .source import SourceLocation
|
||||
from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression, Identifier, Assignment, \
|
||||
Variable
|
||||
from .tokenizer import Tokens, Token
|
||||
|
||||
logger = Logger(__name__)
|
||||
|
|
@ -62,6 +62,18 @@ class Parser:
|
|||
elif mandatory:
|
||||
raise UnexpectedTokenError(self.token, "integer or float")
|
||||
|
||||
def identifier(self, mandatory: bool = False) -> Identifier:
|
||||
if ident := self.accept(Tokens.Identifier):
|
||||
return Identifier(location=ident.loc, name=str(ident.value))
|
||||
elif mandatory:
|
||||
raise UnexpectedTokenError(self.token, "identifier")
|
||||
|
||||
def variable(self, mandatory: bool = False) -> Variable:
|
||||
if ident := self.identifier(mandatory=False):
|
||||
return Variable(location=ident.location(), identifier=ident)
|
||||
elif mandatory:
|
||||
raise UnexpectedTokenError(self.token, "variable identifier")
|
||||
|
||||
def binary_op(self, operand_func: Callable[[], Value], operators: Dict[Tokens, Value]):
|
||||
operand = operand_func()
|
||||
|
||||
|
|
@ -80,8 +92,10 @@ class Parser:
|
|||
return v
|
||||
elif num := self.number():
|
||||
return num
|
||||
elif variable := self.variable():
|
||||
return variable
|
||||
else:
|
||||
raise UnexpectedTokenError(self.token, "parenthesized expression or number")
|
||||
raise UnexpectedTokenError(self.token, "parenthesized expression, number or variable")
|
||||
|
||||
def term(self) -> Value:
|
||||
return self.binary_op(self.factor, operators={
|
||||
|
|
@ -95,9 +109,19 @@ class Parser:
|
|||
Tokens.Op_Minus: Sub,
|
||||
})
|
||||
|
||||
def assignment(self, mandatory: bool = False) -> Assignment:
|
||||
if ident := self.identifier(mandatory):
|
||||
self.expect(Tokens.Equal)
|
||||
expr = self.expression()
|
||||
return Assignment(ident, expr)
|
||||
elif mandatory:
|
||||
raise UnexpectedTokenError(self.token, "assignment")
|
||||
|
||||
def expression(self) -> Value:
|
||||
summation = self.summation()
|
||||
return Expression(summation)
|
||||
if self.peek(Tokens.Identifier):
|
||||
return Expression(self.assignment())
|
||||
else:
|
||||
return Expression(self.summation())
|
||||
|
||||
def root(self) -> Node:
|
||||
return self.expression()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,69 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from . import nodes
|
||||
from .logger import Logger
|
||||
|
||||
logger = Logger(__name__)
|
||||
|
||||
|
||||
class Variable:
|
||||
def __init__(self, context: Context, name: str, value: nodes.Value | None = None):
|
||||
self.context = context
|
||||
self.name = name
|
||||
self.definitions = [value]
|
||||
|
||||
def fully_qualified_name(self) -> str:
|
||||
return f"{self.context.fully_qualified_name()}.{self.name}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}({self.name}) [definitions: {', '.join(repr(d) for d in self.definitions)}]"
|
||||
|
||||
|
||||
class Context:
|
||||
pass
|
||||
_id_sequence = 0
|
||||
|
||||
def __init__(self, name: str | None = None, parent: Context | None = None):
|
||||
self.parent = parent
|
||||
self.variables: dict[str, Variable] = {}
|
||||
self.name = str(Context._id_sequence)
|
||||
if name is not None:
|
||||
self.name = f"{name}_{Context._id_sequence}"
|
||||
Context._id_sequence += 1
|
||||
|
||||
def fully_qualified_name(self) -> str:
|
||||
if self.parent is None:
|
||||
return str(self.name)
|
||||
return f"{self.parent.fully_qualified_name()}::{self.name}"
|
||||
|
||||
def get_variable(self, name: str) -> Variable | None:
|
||||
if name in self.variables:
|
||||
return self.variables[name]
|
||||
elif self.parent is None:
|
||||
return None
|
||||
elif (var := self.parent.get_variable(name)) is not None:
|
||||
return var
|
||||
|
||||
return None
|
||||
|
||||
def set_variable(self, name: str, value: nodes.Value) -> Variable:
|
||||
variable: Variable
|
||||
if name in self.variables:
|
||||
variable = self.variables[name]
|
||||
variable.definitions += [value]
|
||||
else:
|
||||
variable = Variable(self, name, value)
|
||||
|
||||
self.variables[name] = variable
|
||||
|
||||
return variable
|
||||
|
||||
def __repr__(self) -> str:
|
||||
result = [f"{self.__class__.__name__}(id={self.name})"]
|
||||
if self.parent is not None:
|
||||
result += [f"\tParent ID: {self.parent.name}"]
|
||||
if len(self.variables) > 0:
|
||||
result += [f"\tVariables ({len(self.variables)}):"]
|
||||
for key, value in self.variables.items():
|
||||
definitions = (repr(e) for e in value.definitions)
|
||||
result += [f"\t\t- {repr(key)} : {', '.join(definitions)}"]
|
||||
return "\n".join(result)
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ class Tokens(enum.Enum):
|
|||
Op_Divide = re.compile(r"/")
|
||||
Parens_Left = re.compile(r"\(")
|
||||
Parens_Right = re.compile(r"\)")
|
||||
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
|
||||
Equal = re.compile(r"=")
|
||||
Newline = re.compile(r"\n", flags=re.MULTILINE)
|
||||
EOF = re.compile(r"\Z")
|
||||
Blank = re.compile(r"[ \t]+")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue