From 223c3be8192a5457bf088242d0afdc0a00fe2f31 Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Tue, 9 May 2023 01:54:47 +0200 Subject: [PATCH] nodes+parser+semantic+ir: add variables References + assignments --- compiler/__main__.py | 11 ++-- compiler/ir.py | 25 ++++++--- compiler/nodes.py | 123 ++++++++++++++++++++++++++++++------------ compiler/parser.py | 34 ++++++++++-- compiler/semantic.py | 62 ++++++++++++++++++++- compiler/tokenizer.py | 2 + 6 files changed, 207 insertions(+), 50 deletions(-) diff --git a/compiler/__main__.py b/compiler/__main__.py index 9005393..556fd32 100644 --- a/compiler/__main__.py +++ b/compiler/__main__.py @@ -12,8 +12,8 @@ from .tokenizer import Tokenizer, Tokens def main(): data = """ - 2 + 3 / (8 - 1 + 3) * 1 - + 34.2 + 2 + 8 - 1 * (byte = 3 + 5) + / (byte = 255) + byte """ print("Source:\n", data) tokenizer = Tokenizer() @@ -28,8 +28,11 @@ def main(): ast = parser.parse() ast.pprint(depth=10) - context = semantic.Context() - intermediate_representation = ast.intermediate_representation(context=context) + context = semantic.Context("root") + ast.semantic_analysis(context) + print(context) + + intermediate_representation = ast.intermediate_representation() messages = [] for ir_item in intermediate_representation: diff --git a/compiler/ir.py b/compiler/ir.py index 51fdd38..03d5a37 100644 --- a/compiler/ir.py +++ b/compiler/ir.py @@ -39,7 +39,7 @@ class IRValue(IRItem, abc.ABC): class IRMove(IRAction): @beartype - def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue): super().__init__(location) self.dest = dest self.source = source @@ -61,7 +61,11 @@ class IRImmediate(IRValue): return f"{self.value}" -class IRRegister(IRValue): +class IRAssignable(IRValue, metaclass=abc.ABCMeta): + pass + + +class IRRegister(IRAssignable): register_id = 0 def __init__(self, location: SourceLocation): @@ -73,10 +77,19 @@ class IRRegister(IRValue): return f"%r{self.id}" +class IRVariable(IRAssignable): + def __init__(self, location: SourceLocation, fq_identifier: str): + super().__init__(location) + self.fq_identifier = fq_identifier + + def codegen(self) -> str: + return f"@{self.fq_identifier}" + + class IRAdd(IRAction): @beartype - def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): + def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue): super().__init__(location) assert all(isinstance(v, IRValue) for v in values) @@ -94,7 +107,7 @@ class IRAdd(IRAction): class IRMul(IRAction): @beartype - def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): + def __init__(self, location: SourceLocation, dest: IRAssignable, *values: IRValue): super().__init__(location) assert all(isinstance(v, IRValue) for v in values) @@ -112,7 +125,7 @@ class IRMul(IRAction): class IRNegation(IRAction): @beartype - def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue): super().__init__(location) self.source = source @@ -128,7 +141,7 @@ class IRNegation(IRAction): class IRInvert(IRAction): @beartype - def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + def __init__(self, location: SourceLocation, dest: IRAssignable, source: IRValue): super().__init__(location) self.source = source diff --git a/compiler/nodes.py b/compiler/nodes.py index a7cc9ee..f04b38c 100644 --- a/compiler/nodes.py +++ b/compiler/nodes.py @@ -16,8 +16,11 @@ logger = Logger(__name__) class Node: + def __init__(self): + self.context: semantic.Context | None = None + @abstractmethod - def _values(self) -> Any | list[Node]: + def _values(self) -> list[Node | Any]: raise NotImplementedError(f"Please override {__name__}") @functools.cache @@ -62,13 +65,19 @@ class Node: return result @abstractmethod - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}") + def semantic_analysis(self, context: semantic.Context): + logger.debug(f"Doing semantic analysis in {self}") + for value in self._values(): + if isinstance(value, Node): + value.semantic_analysis(context) + self.context = context + @staticmethod - def _prepare_sources_ir(result: list[ir.IRAction], - context: semantic.Context, values: Iterable[Value]) -> list[ir.IRValue]: - vals = [value.intermediate_representation(context) for value in values] + def _prepare_sources_ir(result: list[ir.IRAction], values: Iterable[Value]) -> list[ir.IRValue]: + vals = [value.intermediate_representation() for value in values] for value in vals: result += value @@ -77,19 +86,20 @@ class Node: class Literal(Node, ABC): def __init__(self, location: SourceLocation, value: Any): + super().__init__() self.value = value self.loc = location def location(self) -> SourceLocation: return self.loc - def _values(self) -> Any | list[Node]: - return self.value + def _values(self) -> list[Node | Any]: + return [self.value] def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]: return [f"{indent}{repr(self)}"] - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: dest = ir.IRRegister(location=self.location()) immediate = ir.IRImmediate(location=self.location(), value=self.value) result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)] @@ -97,30 +107,31 @@ class Literal(Node, ABC): class Sum(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: result: list[ir.IRAction] = [] - values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + values_results = Node._prepare_sources_ir(result=result, values=self.values) dest = ir.IRRegister(location=self.location()) result += [ir.IRAdd(self.location(), dest, *values_results)] return result def __init__(self, *values: Value): + super().__init__() self.values = values - def _values(self) -> Any | list[Node]: - return self.values + def _values(self) -> list[Node | Any]: + return list(self.values) class Sub(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: result: list[ir.IRAction] = [] - first_val = self.first_value.intermediate_representation(context) + first_val = self.first_value.intermediate_representation() result += first_val - values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + values_results = Node._prepare_sources_ir(result=result, values=self.values) for i, value_result in enumerate(values_results): d = ir.IRRegister(location=self.location()) @@ -134,37 +145,39 @@ class Sub(Node): return result def __init__(self, first_value: Value, *values: Value): + super().__init__() self.first_value = first_value self.values = values - def _values(self) -> Any | list[Node]: + def _values(self) -> list[Node | Any]: return [self.first_value] + list(self.values) class Product(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: result: list[ir.IRAction] = [] - values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + values_results = Node._prepare_sources_ir(result=result, values=self.values) dest = ir.IRRegister(location=self.location()) result += [ir.IRMul(self.location(), dest, *values_results)] return result def __init__(self, *values: Value): + super().__init__() self.values = values - def _values(self) -> Any | list[Node]: - return self.values + def _values(self) -> list[Node | Any]: + return list(self.values) class Division(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + def intermediate_representation(self) -> list[ir.IRItem]: result: list[ir.IRAction] = [] - first_val = self.first_value.intermediate_representation(context) + first_val = self.first_value.intermediate_representation() result += first_val - values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + values_results = Node._prepare_sources_ir(result=result, values=self.values) for i, value_result in enumerate(values_results): d = ir.IRRegister(location=self.location()) @@ -178,10 +191,11 @@ class Division(Node): return result def __init__(self, first_value: Value, *values: Value): + super().__init__() self.first_value = first_value self.values = values - def _values(self) -> Any | list[Node]: + def _values(self) -> list[Node | Any]: return [self.first_value] + list(self.values) @@ -202,35 +216,76 @@ class Integer(Literal): class Expression(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: - return self.node.intermediate_representation(context) + def intermediate_representation(self) -> list[ir.IRItem]: + return self.node.intermediate_representation() def __init__(self, node: Node): + super().__init__() self.node = node - def _values(self) -> Any | list[Node]: - return self.node + def _values(self) -> list[Node | Any]: + return [self.node] + + def semantic_analysis(self, context: semantic.Context): + return self.node.semantic_analysis(context) def location(self) -> SourceLocation: return self.node.location() class Identifier(Literal): - def __init__(self, name: str): - super().__init__(name) + def __init__(self, location: SourceLocation, name: str): + super().__init__(location, name) + self.value: str + + +class Variable(Literal): + def __init__(self, location: SourceLocation, identifier: Identifier): + super().__init__(location, None) + self.identifier = identifier + + def semantic_analysis(self, context: semantic.Context): + variable = context.get_variable(self.identifier.value) + if variable is None: + raise SemanticAnalysisError(location=self.location(), message=f"Unknown variable '{self.identifier.value}'") + + self.value = variable + logger.debug(f"Linked variable reference to var {variable}") + + def intermediate_representation(self) -> list[ir.IRItem]: + result = [ir.IRVariable(location=self.location(), fq_identifier=self.value.fully_qualified_name())] + return result class Assignment(Node): - def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: - return self.value.intermediate_representation(context) + def intermediate_representation(self) -> list[ir.IRItem]: + assert self.variable is not None + + result: list[ir.IRItem] = [] + value = self.value.intermediate_representation() + result += value + + dest = ir.IRVariable(location=self.location(), fq_identifier=self.variable.fully_qualified_name()) + result += [ir.IRMove(location=self.location(), dest=dest.destination(), source=value[-1].destination())] + + return result def __init__(self, identifier: Identifier, value: Value): + super().__init__() self.identifier = identifier self.value = value + self.variable: semantic.Variable | None = None - def _values(self) -> Any | list[Node]: + def _values(self) -> list[Node | Any]: return [self.identifier, self.value] + def semantic_analysis(self, context: semantic.Context): + super(Assignment, self).semantic_analysis(context) + name = self.identifier.value + variable = context.set_variable(name, value=self.value) + self.variable = variable + logger.debug(f"Added variable {variable} to context {context.fully_qualified_name()}") + Number = Float | Integer -Value = BinaryOperation | Number +Value = BinaryOperation | Number | Variable diff --git a/compiler/parser.py b/compiler/parser.py index 533a237..0716d84 100644 --- a/compiler/parser.py +++ b/compiler/parser.py @@ -4,8 +4,8 @@ from beartype.typing import List, Dict, Callable from .errors import CompilationError, UnexpectedTokenError from .logger import Logger -from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression -from .source import SourceLocation +from .nodes import Float, Sum, Value, Product, Node, Division, Sub, Integer, Expression, Identifier, Assignment, \ + Variable from .tokenizer import Tokens, Token logger = Logger(__name__) @@ -62,6 +62,18 @@ class Parser: elif mandatory: raise UnexpectedTokenError(self.token, "integer or float") + def identifier(self, mandatory: bool = False) -> Identifier: + if ident := self.accept(Tokens.Identifier): + return Identifier(location=ident.loc, name=str(ident.value)) + elif mandatory: + raise UnexpectedTokenError(self.token, "identifier") + + def variable(self, mandatory: bool = False) -> Variable: + if ident := self.identifier(mandatory=False): + return Variable(location=ident.location(), identifier=ident) + elif mandatory: + raise UnexpectedTokenError(self.token, "variable identifier") + def binary_op(self, operand_func: Callable[[], Value], operators: Dict[Tokens, Value]): operand = operand_func() @@ -80,8 +92,10 @@ class Parser: return v elif num := self.number(): return num + elif variable := self.variable(): + return variable else: - raise UnexpectedTokenError(self.token, "parenthesized expression or number") + raise UnexpectedTokenError(self.token, "parenthesized expression, number or variable") def term(self) -> Value: return self.binary_op(self.factor, operators={ @@ -95,9 +109,19 @@ class Parser: Tokens.Op_Minus: Sub, }) + def assignment(self, mandatory: bool = False) -> Assignment: + if ident := self.identifier(mandatory): + self.expect(Tokens.Equal) + expr = self.expression() + return Assignment(ident, expr) + elif mandatory: + raise UnexpectedTokenError(self.token, "assignment") + def expression(self) -> Value: - summation = self.summation() - return Expression(summation) + if self.peek(Tokens.Identifier): + return Expression(self.assignment()) + else: + return Expression(self.summation()) def root(self) -> Node: return self.expression() diff --git a/compiler/semantic.py b/compiler/semantic.py index 8296e48..750724a 100644 --- a/compiler/semantic.py +++ b/compiler/semantic.py @@ -1,9 +1,69 @@ from __future__ import annotations +from . import nodes from .logger import Logger logger = Logger(__name__) +class Variable: + def __init__(self, context: Context, name: str, value: nodes.Value | None = None): + self.context = context + self.name = name + self.definitions = [value] + + def fully_qualified_name(self) -> str: + return f"{self.context.fully_qualified_name()}.{self.name}" + + def __repr__(self): + return f"{self.__class__.__name__}({self.name}) [definitions: {', '.join(repr(d) for d in self.definitions)}]" + + class Context: - pass + _id_sequence = 0 + + def __init__(self, name: str | None = None, parent: Context | None = None): + self.parent = parent + self.variables: dict[str, Variable] = {} + self.name = str(Context._id_sequence) + if name is not None: + self.name = f"{name}_{Context._id_sequence}" + Context._id_sequence += 1 + + def fully_qualified_name(self) -> str: + if self.parent is None: + return str(self.name) + return f"{self.parent.fully_qualified_name()}::{self.name}" + + def get_variable(self, name: str) -> Variable | None: + if name in self.variables: + return self.variables[name] + elif self.parent is None: + return None + elif (var := self.parent.get_variable(name)) is not None: + return var + + return None + + def set_variable(self, name: str, value: nodes.Value) -> Variable: + variable: Variable + if name in self.variables: + variable = self.variables[name] + variable.definitions += [value] + else: + variable = Variable(self, name, value) + + self.variables[name] = variable + + return variable + + def __repr__(self) -> str: + result = [f"{self.__class__.__name__}(id={self.name})"] + if self.parent is not None: + result += [f"\tParent ID: {self.parent.name}"] + if len(self.variables) > 0: + result += [f"\tVariables ({len(self.variables)}):"] + for key, value in self.variables.items(): + definitions = (repr(e) for e in value.definitions) + result += [f"\t\t- {repr(key)} : {', '.join(definitions)}"] + return "\n".join(result) diff --git a/compiler/tokenizer.py b/compiler/tokenizer.py index f65cab4..07c0c86 100644 --- a/compiler/tokenizer.py +++ b/compiler/tokenizer.py @@ -33,6 +33,8 @@ class Tokens(enum.Enum): Op_Divide = re.compile(r"/") Parens_Left = re.compile(r"\(") Parens_Right = re.compile(r"\)") + Identifier = re.compile(r"[a-zA-Z_][a-zA-Z_0-9]*") + Equal = re.compile(r"=") Newline = re.compile(r"\n", flags=re.MULTILINE) EOF = re.compile(r"\Z") Blank = re.compile(r"[ \t]+")