nodes+parser+semantic+ir: add variables

References + assignments
This commit is contained in:
Antoine Viallon 2023-05-09 01:54:47 +02:00
parent e9324f4f71
commit 223c3be819
Signed by: aviallon
GPG key ID: D126B13AB555E16F
6 changed files with 207 additions and 50 deletions

View file

@ -12,8 +12,8 @@ from .tokenizer import Tokenizer, Tokens
def main(): def main():
data = """ data = """
2 + 3 / (8 - 1 + 3) * 1 2 + 8 - 1 * (byte = 3 + 5)
+ 34.2 / (byte = 255) + byte
""" """
print("Source:\n", data) print("Source:\n", data)
tokenizer = Tokenizer() tokenizer = Tokenizer()
@ -28,8 +28,11 @@ def main():
ast = parser.parse() ast = parser.parse()
ast.pprint(depth=10) ast.pprint(depth=10)
context = semantic.Context() context = semantic.Context("root")
intermediate_representation = ast.intermediate_representation(context=context) ast.semantic_analysis(context)
print(context)
intermediate_representation = ast.intermediate_representation()
messages = [] messages = []
for ir_item in intermediate_representation: for ir_item in intermediate_representation:

View file

@ -39,7 +39,7 @@ class IRValue(IRItem, abc.ABC):
class IRMove(IRAction): class IRMove(IRAction):
@beartype @beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location) super().__init__(location)
self.dest = dest self.dest = dest
self.source = source self.source = source
@ -61,7 +61,11 @@ class IRImmediate(IRValue):
return f"{self.value}" return f"{self.value}"
class IRRegister(IRValue): class IRAssignable(IRValue, metaclass=abc.ABCMeta):
pass
class IRRegister(IRAssignable):
register_id = 0 register_id = 0
def __init__(self, location: SourceLocation): def __init__(self, location: SourceLocation):
@ -73,10 +77,19 @@ class IRRegister(IRValue):
return f"%r{self.id}" return f"%r{self.id}"
class IRVariable(IRAssignable):
def __init__(self, location: SourceLocation, fq_identifier: str):
super().__init__(location)
self.fq_identifier = fq_identifier
def codegen(self) -> str:
return f"@{self.fq_identifier}"
class IRAdd(IRAction): class IRAdd(IRAction):
@beartype @beartype
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
super().__init__(location) super().__init__(location)
assert all(isinstance(v, IRValue) for v in values) assert all(isinstance(v, IRValue) for v in values)
@ -94,7 +107,7 @@ class IRAdd(IRAction):
class IRMul(IRAction): class IRMul(IRAction):
@beartype @beartype
def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue):
super().__init__(location) super().__init__(location)
assert all(isinstance(v, IRValue) for v in values) assert all(isinstance(v, IRValue) for v in values)
@ -112,7 +125,7 @@ class IRMul(IRAction):
class IRNegation(IRAction): class IRNegation(IRAction):
@beartype @beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location) super().__init__(location)
self.source = source self.source = source
@ -128,7 +141,7 @@ class IRNegation(IRAction):
class IRInvert(IRAction): class IRInvert(IRAction):
@beartype @beartype
def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue):
super().__init__(location) super().__init__(location)
self.source = source self.source = source

View file

@ -16,8 +16,11 @@ logger = Logger(__name__)
class Node: class Node:
def __init__(self):
self.context: semantic.Context | None = None
@abstractmethod @abstractmethod
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
raise NotImplementedError(f"Please override {__name__}") raise NotImplementedError(f"Please override {__name__}")
@functools.cache @functools.cache
@ -62,13 +65,19 @@ class Node:
return result return result
@abstractmethod @abstractmethod
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}") raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}")
def semantic_analysis(self, context: semantic.Context):
logger.debug(f"Doing semantic analysis in {self}")
for value in self._values():
if isinstance(value, Node):
value.semantic_analysis(context)
self.context = context
@staticmethod @staticmethod
def _prepare_sources_ir(result: list[ir.IRAction], def _prepare_sources_ir(result: list[ir.IRAction], values: Iterable[Value]) -> list[ir.IRValue]:
context: semantic.Context, values: Iterable[Value]) -> list[ir.IRValue]: vals = [value.intermediate_representation() for value in values]
vals = [value.intermediate_representation(context) for value in values]
for value in vals: for value in vals:
result += value result += value
@ -77,19 +86,20 @@ class Node:
class Literal(Node, ABC): class Literal(Node, ABC):
def __init__(self, location: SourceLocation, value: Any): def __init__(self, location: SourceLocation, value: Any):
super().__init__()
self.value = value self.value = value
self.loc = location self.loc = location
def location(self) -> SourceLocation: def location(self) -> SourceLocation:
return self.loc return self.loc
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return self.value return [self.value]
def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]: def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]:
return [f"{indent}{repr(self)}"] return [f"{indent}{repr(self)}"]
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
dest = ir.IRRegister(location=self.location()) dest = ir.IRRegister(location=self.location())
immediate = ir.IRImmediate(location=self.location(), value=self.value) immediate = ir.IRImmediate(location=self.location(), value=self.value)
result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)] result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)]
@ -97,30 +107,31 @@ class Literal(Node, ABC):
class Sum(Node): class Sum(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = [] result: list[ir.IRAction] = []
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) values_results = Node._prepare_sources_ir(result=result, values=self.values)
dest = ir.IRRegister(location=self.location()) dest = ir.IRRegister(location=self.location())
result += [ir.IRAdd(self.location(), dest, *values_results)] result += [ir.IRAdd(self.location(), dest, *values_results)]
return result return result
def __init__(self, *values: Value): def __init__(self, *values: Value):
super().__init__()
self.values = values self.values = values
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return self.values return list(self.values)
class Sub(Node): class Sub(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = [] result: list[ir.IRAction] = []
first_val = self.first_value.intermediate_representation(context) first_val = self.first_value.intermediate_representation()
result += first_val result += first_val
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) values_results = Node._prepare_sources_ir(result=result, values=self.values)
for i, value_result in enumerate(values_results): for i, value_result in enumerate(values_results):
d = ir.IRRegister(location=self.location()) d = ir.IRRegister(location=self.location())
@ -134,37 +145,39 @@ class Sub(Node):
return result return result
def __init__(self, first_value: Value, *values: Value): def __init__(self, first_value: Value, *values: Value):
super().__init__()
self.first_value = first_value self.first_value = first_value
self.values = values self.values = values
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return [self.first_value] + list(self.values) return [self.first_value] + list(self.values)
class Product(Node): class Product(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = [] result: list[ir.IRAction] = []
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) values_results = Node._prepare_sources_ir(result=result, values=self.values)
dest = ir.IRRegister(location=self.location()) dest = ir.IRRegister(location=self.location())
result += [ir.IRMul(self.location(), dest, *values_results)] result += [ir.IRMul(self.location(), dest, *values_results)]
return result return result
def __init__(self, *values: Value): def __init__(self, *values: Value):
super().__init__()
self.values = values self.values = values
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return self.values return list(self.values)
class Division(Node): class Division(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
result: list[ir.IRAction] = [] result: list[ir.IRAction] = []
first_val = self.first_value.intermediate_representation(context) first_val = self.first_value.intermediate_representation()
result += first_val result += first_val
values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) values_results = Node._prepare_sources_ir(result=result, values=self.values)
for i, value_result in enumerate(values_results): for i, value_result in enumerate(values_results):
d = ir.IRRegister(location=self.location()) d = ir.IRRegister(location=self.location())
@ -178,10 +191,11 @@ class Division(Node):
return result return result
def __init__(self, first_value: Value, *values: Value): def __init__(self, first_value: Value, *values: Value):
super().__init__()
self.first_value = first_value self.first_value = first_value
self.values = values self.values = values
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return [self.first_value] + list(self.values) return [self.first_value] + list(self.values)
@ -202,35 +216,76 @@ class Integer(Literal):
class Expression(Node): class Expression(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
return self.node.intermediate_representation(context) return self.node.intermediate_representation()
def __init__(self, node: Node): def __init__(self, node: Node):
super().__init__()
self.node = node self.node = node
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return self.node return [self.node]
def semantic_analysis(self, context: semantic.Context):
return self.node.semantic_analysis(context)
def location(self) -> SourceLocation: def location(self) -> SourceLocation:
return self.node.location() return self.node.location()
class Identifier(Literal): class Identifier(Literal):
def __init__(self, name: str): def __init__(self, location: SourceLocation, name: str):
super().__init__(name) super().__init__(location, name)
self.value: str
class Variable(Literal):
def __init__(self, location: SourceLocation, identifier: Identifier):
super().__init__(location, None)
self.identifier = identifier
def semantic_analysis(self, context: semantic.Context):
variable = context.get_variable(self.identifier.value)
if variable is None:
raise SemanticAnalysisError(location=self.location(), message=f"Unknown variable '{self.identifier.value}'")
self.value = variable
logger.debug(f"Linked variable reference to var {variable}")
def intermediate_representation(self) -> list[ir.IRItem]:
result = [ir.IRVariable(location=self.location(), fq_identifier=self.value.fully_qualified_name())]
return result
class Assignment(Node): class Assignment(Node):
def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: def intermediate_representation(self) -> list[ir.IRItem]:
return self.value.intermediate_representation(context) assert self.variable is not None
result: list[ir.IRItem] = []
value = self.value.intermediate_representation()
result += value
dest = ir.IRVariable(location=self.location(), fq_identifier=self.variable.fully_qualified_name())
result += [ir.IRMove(location=self.location(), dest=dest.destination(), source=value[-1].destination())]
return result
def __init__(self, identifier: Identifier, value: Value): def __init__(self, identifier: Identifier, value: Value):
super().__init__()
self.identifier = identifier self.identifier = identifier
self.value = value self.value = value
self.variable: semantic.Variable | None = None
def _values(self) -> Any | list[Node]: def _values(self) -> list[Node | Any]:
return [self.identifier, self.value] return [self.identifier, self.value]
def semantic_analysis(self, context: semantic.Context):
super(Assignment, self).semantic_analysis(context)
name = self.identifier.value
variable = context.set_variable(name, value=self.value)
self.variable = variable
logger.debug(f"Added variable {variable} to context {context.fully_qualified_name()}")
Number = Float | Integer Number = Float | Integer
Value = BinaryOperation | Number Value = BinaryOperation | Number | Variable

View file

@ -4,8 +4,8 @@ from beartype.typing import List, Dict, Callable
from .errors import CompilationError, UnexpectedTokenError from .errors import CompilationError, UnexpectedTokenError
from .logger import Logger from .logger import Logger
from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression, Identifier, Assignment, \
from .source import SourceLocation Variable
from .tokenizer import Tokens, Token from .tokenizer import Tokens, Token
logger = Logger(__name__) logger = Logger(__name__)
@ -62,6 +62,18 @@ class Parser:
elif mandatory: elif mandatory:
raise UnexpectedTokenError(self.token, "integer or float") raise UnexpectedTokenError(self.token, "integer or float")
def identifier(self, mandatory: bool = False) -> Identifier:
if ident := self.accept(Tokens.Identifier):
return Identifier(location=ident.loc, name=str(ident.value))
elif mandatory:
raise UnexpectedTokenError(self.token, "identifier")
def variable(self, mandatory: bool = False) -> Variable:
if ident := self.identifier(mandatory=False):
return Variable(location=ident.location(), identifier=ident)
elif mandatory:
raise UnexpectedTokenError(self.token, "variable identifier")
def binary_op(self, operand_func: Callable[[], Value], operators: Dict[Tokens, Value]): def binary_op(self, operand_func: Callable[[], Value], operators: Dict[Tokens, Value]):
operand = operand_func() operand = operand_func()
@ -80,8 +92,10 @@ class Parser:
return v return v
elif num := self.number(): elif num := self.number():
return num return num
elif variable := self.variable():
return variable
else: else:
raise UnexpectedTokenError(self.token, "parenthesized expression or number") raise UnexpectedTokenError(self.token, "parenthesized expression, number or variable")
def term(self) -> Value: def term(self) -> Value:
return self.binary_op(self.factor, operators={ return self.binary_op(self.factor, operators={
@ -95,9 +109,19 @@ class Parser:
Tokens.Op_Minus: Sub, Tokens.Op_Minus: Sub,
}) })
def assignment(self, mandatory: bool = False) -> Assignment:
if ident := self.identifier(mandatory):
self.expect(Tokens.Equal)
expr = self.expression()
return Assignment(ident, expr)
elif mandatory:
raise UnexpectedTokenError(self.token, "assignment")
def expression(self) -> Value: def expression(self) -> Value:
summation = self.summation() if self.peek(Tokens.Identifier):
return Expression(summation) return Expression(self.assignment())
else:
return Expression(self.summation())
def root(self) -> Node: def root(self) -> Node:
return self.expression() return self.expression()

View file

@ -1,9 +1,69 @@
from __future__ import annotations from __future__ import annotations
from . import nodes
from .logger import Logger from .logger import Logger
logger = Logger(__name__) logger = Logger(__name__)
class Variable:
def __init__(self, context: Context, name: str, value: nodes.Value | None = None):
self.context = context
self.name = name
self.definitions = [value]
def fully_qualified_name(self) -> str:
return f"{self.context.fully_qualified_name()}.{self.name}"
def __repr__(self):
return f"{self.__class__.__name__}({self.name}) [definitions: {', '.join(repr(d) for d in self.definitions)}]"
class Context: class Context:
pass _id_sequence = 0
def __init__(self, name: str | None = None, parent: Context | None = None):
self.parent = parent
self.variables: dict[str, Variable] = {}
self.name = str(Context._id_sequence)
if name is not None:
self.name = f"{name}_{Context._id_sequence}"
Context._id_sequence += 1
def fully_qualified_name(self) -> str:
if self.parent is None:
return str(self.name)
return f"{self.parent.fully_qualified_name()}::{self.name}"
def get_variable(self, name: str) -> Variable | None:
if name in self.variables:
return self.variables[name]
elif self.parent is None:
return None
elif (var := self.parent.get_variable(name)) is not None:
return var
return None
def set_variable(self, name: str, value: nodes.Value) -> Variable:
variable: Variable
if name in self.variables:
variable = self.variables[name]
variable.definitions += [value]
else:
variable = Variable(self, name, value)
self.variables[name] = variable
return variable
def __repr__(self) -> str:
result = [f"{self.__class__.__name__}(id={self.name})"]
if self.parent is not None:
result += [f"\tParent ID: {self.parent.name}"]
if len(self.variables) > 0:
result += [f"\tVariables ({len(self.variables)}):"]
for key, value in self.variables.items():
definitions = (repr(e) for e in value.definitions)
result += [f"\t\t- {repr(key)} : {', '.join(definitions)}"]
return "\n".join(result)

View file

@ -33,6 +33,8 @@ class Tokens(enum.Enum):
Op_Divide = re.compile(r"/") Op_Divide = re.compile(r"/")
Parens_Left = re.compile(r"\(") Parens_Left = re.compile(r"\(")
Parens_Right = re.compile(r"\)") Parens_Right = re.compile(r"\)")
Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*")
Equal = re.compile(r"=")
Newline = re.compile(r"\n", flags=re.MULTILINE) Newline = re.compile(r"\n", flags=re.MULTILINE)
EOF = re.compile(r"\Z") EOF = re.compile(r"\Z")
Blank = re.compile(r"[ \t]+") Blank = re.compile(r"[ \t]+")