nodes+parser+semantic+ir: add variables

References + assignments
This commit is contained in:
Antoine Viallon 2023-05-09 01:54:47 +02:00
parent e9324f4f71
commit 223c3be819
Signed by: aviallon
GPG key ID: D126B13AB555E16F
6 changed files with 207 additions and 50 deletions

View file

@ -12,8 +12,8 @@ from .tokenizer import Tokenizer, Tokens
def main():
data = """
2 + 3 / (8 - 1 + 3) * 1
+ 34.2
2 + 8 - 1 * (byte = 3 + 5)
/ (byte = 255) + byte
"""
print("Source:\n", data)
tokenizer = Tokenizer()
@ -28,8 +28,11 @@ def main():
ast = parser.parse()
ast.pprint(depth=10)
context = semantic.Context()
intermediate_representation = ast.intermediate_representation(context=context)
context = semantic.Context("root")
ast.semantic_analysis(context)
print(context)
intermediate_representation = ast.intermediate_representation()
messages = []
for ir_item in intermediate_representation:

View file

@ -39,7 +39,7 @@ class IRValue(IRItem, abc.ABC):
class IRMove(IRAction):
@beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location)
self.dest = dest
self.source = source
@ -61,7 +61,11 @@ class IRImmediate(IRValue):
return f"{self.value}"
class IRRegister(IRValue):
class IRAssignable(IRValue, metaclass=abc.ABCMeta):
pass
class IRRegister(IRAssignable):
register_id = 0
def __init__(self, location: SourceLocation):
@ -73,10 +77,19 @@ class IRRegister(IRValue):
return f"%r{self.id}"
class IRVariable(IRAssignable):
def __init__(self, location: SourceLocation, fq_identifier: str):
super().__init__(location)
self.fq_identifier = fq_identifier
def codegen(self) -> str:
return f"@{self.fq_identifier}"
class IRAdd(IRAction):
@beartype
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue):
def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
super().__init__(location)
assert all(isinstance(v, IRValue) for v in values)
@ -94,7 +107,7 @@ class IRAdd(IRAction):
class IRMul(IRAction):
@beartype
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue):
def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
super().__init__(location)
assert all(isinstance(v, IRValue) for v in values)
@ -112,7 +125,7 @@ class IRMul(IRAction):
class IRNegation(IRAction):
@beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location)
self.source = source
@ -128,7 +141,7 @@ class IRNegation(IRAction):
class IRInvert(IRAction):
@beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue):
def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location)
self.source = source

View file

@ -16,8 +16,11 @@ logger = Logger(__name__)
class Node:
def __init__(self):
self.context: semantic.Context | None = None
@abstractmethod
def _values(self) -> Any | list[Node]:
def _values(self) -> list[Node | Any]:
raise NotImplementedError(f"Please override {__name__}")
@functools.cache
@ -62,13 +65,19 @@ class Node:
return result
@abstractmethod
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}")
def semantic_analysis(self, context: semantic.Context):
logger.debug(f"Doing semantic analysis in {self}")
for value in self._values():
if isinstance(value, Node):
value.semantic_analysis(context)
self.context = context
@staticmethod
def _prepare_sources_ir(result: list[ir.IRAction],
context: semantic.Context, values: Iterable[Value]) -> list[ir.IRValue]:
vals = [value.intermediate_representation(context) for value in values]
def _prepare_sources_ir(result: list[ir.IRAction], values: Iterable[Value]) -> list[ir.IRValue]:
vals = [value.intermediate_representation() for value in values]
for value in vals:
result += value
@ -77,19 +86,20 @@ class Node:
class Literal(Node, ABC):
def __init__(self, location: SourceLocation, value: Any):
super().__init__()
self.value = value
self.loc = location
def location(self) -> SourceLocation:
return self.loc
def _values(self) -> Any | list[Node]:
return self.value
def _values(self) -> list[Node | Any]:
return [self.value]
def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]:
return [f"{indent}{repr(self)}"]
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
dest = ir.IRRegister(location=self.location())
immediate = ir.IRImmediate(location=self.location(), value=self.value)
result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)]
@ -97,30 +107,31 @@ class Literal(Node, ABC):
class Sum(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = []
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
values_results = Node._prepare_sources_ir(result=result, values=self.values)
dest = ir.IRRegister(location=self.location())
result += [ir.IRAdd(self.location(), dest, *values_results)]
return result
def __init__(self, *values: Value):
super().__init__()
self.values = values
def _values(self) -> Any | list[Node]:
return self.values
def _values(self) -> list[Node | Any]:
return list(self.values)
class Sub(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = []
first_val = self.first_value.intermediate_representation(context)
first_val = self.first_value.intermediate_representation()
result += first_val
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
values_results = Node._prepare_sources_ir(result=result, values=self.values)
for i, value_result in enumerate(values_results):
d = ir.IRRegister(location=self.location())
@ -134,37 +145,39 @@ class Sub(Node):
return result
def __init__(self, first_value: Value, *values: Value):
super().__init__()
self.first_value = first_value
self.values = values
def _values(self) -> Any | list[Node]:
def _values(self) -> list[Node | Any]:
return [self.first_value] + list(self.values)
class Product(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = []
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
values_results = Node._prepare_sources_ir(result=result, values=self.values)
dest = ir.IRRegister(location=self.location())
result += [ir.IRMul(self.location(), dest, *values_results)]
return result
def __init__(self, *values: Value):
super().__init__()
self.values = values
def _values(self) -> Any | list[Node]:
return self.values
def _values(self) -> list[Node | Any]:
return list(self.values)
class Division(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = []
first_val = self.first_value.intermediate_representation(context)
first_val = self.first_value.intermediate_representation()
result += first_val
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values)
values_results = Node._prepare_sources_ir(result=result, values=self.values)
for i, value_result in enumerate(values_results):
d = ir.IRRegister(location=self.location())
@ -178,10 +191,11 @@ class Division(Node):
return result
def __init__(self, first_value: Value, *values: Value):
super().__init__()
self.first_value = first_value
self.values = values
def _values(self) -> Any | list[Node]:
def _values(self) -> list[Node | Any]:
return [self.first_value] + list(self.values)
@ -202,35 +216,76 @@ class Integer(Literal):
class Expression(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
return self.node.intermediate_representation(context)
def intermediate_representation(self) -> list[ir.IRItem]:
return self.node.intermediate_representation()
def __init__(self, node: Node):
super().__init__()
self.node = node
def _values(self) -> Any | list[Node]:
return self.node
def _values(self) -> list[Node | Any]:
return [self.node]
def semantic_analysis(self, context: semantic.Context):
return self.node.semantic_analysis(context)
def location(self) -> SourceLocation:
return self.node.location()
class Identifier(Literal):
def __init__(self, name: str):
super().__init__(name)
def __init__(self, location: SourceLocation, name: str):
super().__init__(location, name)
self.value: str
class Variable(Literal):
def __init__(self, location: SourceLocation, identifier: Identifier):
super().__init__(location, None)
self.identifier = identifier
def semantic_analysis(self, context: semantic.Context):
variable = context.get_variable(self.identifier.value)
if variable is None:
raise SemanticAnalysisError(location=self.location(), message=f"Unknown variable '{self.identifier.value}'")
self.value = variable
logger.debug(f"Linked variable reference to var {variable}")
def intermediate_representation(self) -> list[ir.IRItem]:
result = [ir.IRVariable(location=self.location(), fq_identifier=self.value.fully_qualified_name())]
return result
class Assignment(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]:
return self.value.intermediate_representation(context)
def intermediate_representation(self) -> list[ir.IRItem]:
assert self.variable is not None
result: list[ir.IRItem] = []
value = self.value.intermediate_representation()
result += value
dest = ir.IRVariable(location=self.location(), fq_identifier=self.variable.fully_qualified_name())
result += [ir.IRMove(location=self.location(), dest=dest.destination(), source=value[-1].destination())]
return result
def __init__(self, identifier: Identifier, value: Value):
super().__init__()
self.identifier = identifier
self.value = value
self.variable: semantic.Variable | None = None
def _values(self) -> Any | list[Node]:
def _values(self) -> list[Node | Any]:
return [self.identifier, self.value]
def semantic_analysis(self, context: semantic.Context):
super(Assignment, self).semantic_analysis(context)
name = self.identifier.value
variable = context.set_variable(name, value=self.value)
self.variable = variable
logger.debug(f"Added variable {variable} to context {context.fully_qualified_name()}")
Number = Float | Integer
Value = BinaryOperation | Number
Value = BinaryOperation | Number | Variable

View file

@ -4,8 +4,8 @@ from beartype.typing import List, Dict, Callable
from .errors import CompilationError, UnexpectedTokenError
from .logger import Logger
from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression
from .source import SourceLocation
from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression, Identifier, Assignment, \
Variable
from .tokenizer import Tokens, Token
logger = Logger(__name__)
@ -62,6 +62,18 @@ class Parser:
elif mandatory:
raise UnexpectedTokenError(self.token, "integer or float")
def identifier(self, mandatory: bool = False) -> Identifier:
if ident := self.accept(Tokens.Identifier):
return Identifier(location=ident.loc, name=str(ident.value))
elif mandatory:
raise UnexpectedTokenError(self.token, "identifier")
def variable(self, mandatory: bool = False) -> Variable:
if ident := self.identifier(mandatory=False):
return Variable(location=ident.location(), identifier=ident)
elif mandatory:
raise UnexpectedTokenError(self.token, "variable identifier")
def binary_op(self, operand_func: Callable[[], Value], operators: Dict[Tokens, Value]):
operand = operand_func()
@ -80,8 +92,10 @@ class Parser:
return v
elif num := self.number():
return num
elif variable := self.variable():
return variable
else:
raise UnexpectedTokenError(self.token, "parenthesized expression or number")
raise UnexpectedTokenError(self.token, "parenthesized expression, number or variable")
def term(self) -> Value:
return self.binary_op(self.factor, operators={
@ -95,9 +109,19 @@ class Parser:
Tokens.Op_Minus: Sub,
})
def assignment(self, mandatory: bool = False) -> Assignment:
if ident := self.identifier(mandatory):
self.expect(Tokens.Equal)
expr = self.expression()
return Assignment(ident, expr)
elif mandatory:
raise UnexpectedTokenError(self.token, "assignment")
def expression(self) -> Value:
summation = self.summation()
return Expression(summation)
if self.peek(Tokens.Identifier):
return Expression(self.assignment())
else:
return Expression(self.summation())
def root(self) -> Node:
return self.expression()

View file

@ -1,9 +1,69 @@
from __future__ import annotations
from . import nodes
from .logger import Logger
logger = Logger(__name__)
class Variable:
def __init__(self, context: Context, name: str, value: nodes.Value | None = None):
self.context = context
self.name = name
self.definitions = [value]
def fully_qualified_name(self) -> str:
return f"{self.context.fully_qualified_name()}.{self.name}"
def __repr__(self):
return f"{self.__class__.__name__}({self.name}) [definitions: {', '.join(repr(d) for d in self.definitions)}]"
class Context:
pass
_id_sequence = 0
def __init__(self, name: str | None = None, parent: Context | None = None):
self.parent = parent
self.variables: dict[str, Variable] = {}
self.name = str(Context._id_sequence)
if name is not None:
self.name = f"{name}_{Context._id_sequence}"
Context._id_sequence += 1
def fully_qualified_name(self) -> str:
if self.parent is None:
return str(self.name)
return f"{self.parent.fully_qualified_name()}::{self.name}"
def get_variable(self, name: str) -> Variable | None:
if name in self.variables:
return self.variables[name]
elif self.parent is None:
return None
elif (var := self.parent.get_variable(name)) is not None:
return var
return None
def set_variable(self, name: str, value: nodes.Value) -> Variable:
variable: Variable
if name in self.variables:
variable = self.variables[name]
variable.definitions += [value]
else:
variable = Variable(self, name, value)
self.variables[name] = variable
return variable
def __repr__(self) -> str:
result = [f"{self.__class__.__name__}(id={self.name})"]
if self.parent is not None:
result += [f"\tParent ID: {self.parent.name}"]
if len(self.variables) > 0:
result += [f"\tVariables ({len(self.variables)}):"]
for key, value in self.variables.items():
definitions = (repr(e) for e in value.definitions)
result += [f"\t\t- {repr(key)} : {', '.join(definitions)}"]
return "\n".join(result)

View file

@ -33,6 +33,8 @@ class Tokens(enum.Enum):
Op_Divide = re.compile(r"/")
Parens_Left = re.compile(r"\(")
Parens_Right = re.compile(r"\)")
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
Equal = re.compile(r"=")
Newline = re.compile(r"\n", flags=re.MULTILINE)
EOF = re.compile(r"\Z")
Blank = re.compile(r"[ \t]+")