From 4bc481ed54cea74638dcc3f588a3cb5975839ba6 Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Mon, 8 May 2023 23:14:53 +0200 Subject: [PATCH] ir+semantic: init intermediate representation generation --- compiler/__main__.py | 20 +++++- compiler/ir.py | 141 +++++++++++++++++++++++++++++++++++++++++++ compiler/nodes.py | 94 +++++++++++++++++++++++++++-- compiler/semantic.py | 9 +++ 4 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 compiler/ir.py create mode 100644 compiler/semantic.py diff --git a/compiler/__main__.py b/compiler/__main__.py index de80160..7254e27 100644 --- a/compiler/__main__.py +++ b/compiler/__main__.py @@ -3,6 +3,7 @@ from __future__ import annotations import sys from pprint import pprint +from . import semantic from .logger import rootLogger, LogLevel from .parser import Parser, ParsingError from .tokenizer import Tokenizer, Tokens @@ -23,7 +24,24 @@ def main(): parser = Parser(tokens) try: - parser.parse().pprint(depth=3) + ast = parser.parse() + ast.pprint(depth=10) + + context = semantic.Context() + intermediate_representation = ast.intermediate_representation(context=context) + + messages = [] + for ir_item in intermediate_representation: + ir_item.location.source = data + prefix = f"{str(ir_item.location) + ':':<30}" + source_info = ir_item.location.source_substring.splitlines(keepends=False) + messages += [f"# {prefix} {source_info.pop(0)}"] + while len(source_info) > 0: + messages += [f"# {' ' * len(prefix)} {source_info.pop(0)}"] + + messages += [f"{repr(ir_item)}\n"] + + print("\n".join(messages)) except ParsingError as e: e.location.source = data print(f"{e}\n{e.location.show_in_source()}", file=sys.stderr) diff --git a/compiler/ir.py b/compiler/ir.py new file mode 100644 index 0000000..51fdd38 --- /dev/null +++ b/compiler/ir.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import abc +from abc import abstractmethod + +from beartype import beartype + +from .logger import Logger +from .source import SourceLocation + +logger = Logger(__name__) + + +class IRItem: + def __init__(self, location: SourceLocation): + self.location = location + + @abstractmethod + def codegen(self) -> str: + raise NotImplementedError(f"Please override {self.__class__.__name__}.{__name__}") + + @abstractmethod + def destination(self) -> IRValue: + raise NotImplementedError(f"Please override {self.__class__.__name__}.{__name__}") + + def __repr__(self): + return f"{self.codegen()}" + + +class IRAction(IRItem, abc.ABC): + pass + + +class IRValue(IRItem, abc.ABC): + def destination(self) -> IRValue: + return self + + +class IRMove(IRAction): + + @beartype + def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + super().__init__(location) + self.dest = dest + self.source = source + + def destination(self) -> IRValue: + return self.dest + + def codegen(self) -> str: + return f"MOVE {self.source} -> {self.dest}" + + +class IRImmediate(IRValue): + @beartype + def __init__(self, location: SourceLocation, value: int | float | str): + super().__init__(location) + self.value = value + + def codegen(self): + return f"{self.value}" + + +class IRRegister(IRValue): + register_id = 0 + + def __init__(self, location: SourceLocation): + super().__init__(location) + self.id = IRRegister.register_id + IRRegister.register_id += 1 + + def codegen(self): + return f"%r{self.id}" + + +class IRAdd(IRAction): + + @beartype + def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): + super().__init__(location) + assert all(isinstance(v, IRValue) for v in values) + + self.values = values + self.dest = dest + + def destination(self) -> IRValue: + return self.dest + + def codegen(self) -> str: + values = [repr(value) for value in self.values] + return f"ADD {', '.join(values)} -> {self.dest}" + + +class IRMul(IRAction): + + @beartype + def __init__(self, location: SourceLocation, dest: IRRegister, *values: IRValue): + super().__init__(location) + assert all(isinstance(v, IRValue) for v in values) + + self.values = values + self.dest = dest + + def destination(self) -> IRValue: + return self.dest + + def codegen(self) -> str: + values = [repr(value) for value in self.values] + return f"MUL {', '.join(values)} -> {self.dest}" + + +class IRNegation(IRAction): + + @beartype + def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + super().__init__(location) + + self.source = source + self.dest = dest + + def destination(self) -> IRValue: + return self.dest + + def codegen(self) -> str: + return f"NEG {self.source} -> {self.dest}" + + +class IRInvert(IRAction): + + @beartype + def __init__(self, location: SourceLocation, dest: IRRegister, source: IRValue): + super().__init__(location) + + self.source = source + self.dest = dest + + def destination(self) -> IRValue: + return self.dest + + def codegen(self) -> str: + return f"INVERT {self.source} -> {self.dest}" diff --git a/compiler/nodes.py b/compiler/nodes.py index 4388a29..47f13f7 100644 --- a/compiler/nodes.py +++ b/compiler/nodes.py @@ -1,11 +1,14 @@ from __future__ import annotations -from abc import abstractmethod -from typing import Any +import functools +from abc import abstractmethod, ABC +from typing import Any, Iterable from beartype import beartype +from . import ir, semantic from .logger import Logger +from .source import SourceLocation logger = Logger(__name__) @@ -57,6 +60,19 @@ class Node: return result + @abstractmethod + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + raise NotImplementedError(f"Please implement {self.__class__.__name__}.{__name__}") + + @staticmethod + def _prepare_sources_ir(result: list[ir.IRAction], + context: semantic.Context, values: Iterable[Value]) -> list[ir.IRValue]: + vals = [value.intermediate_representation(context) for value in values] + for value in vals: + result += value + + return [value[-1].destination() for value in vals] + class Literal(Node, ABC): def __init__(self, location: SourceLocation, value: Any): @@ -72,8 +88,23 @@ class Literal(Node, ABC): def _pprint(self, depth: int | None, indent: str, _depth: int = 0) -> list[str]: return [f"{indent}{repr(self)}"] + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + dest = ir.IRRegister(location=self.location()) + immediate = ir.IRImmediate(location=self.location(), value=self.value) + result = [ir.IRMove(location=self.location(), dest=dest, source=immediate)] + return result + class Sum(Node): + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + result: list[ir.IRAction] = [] + + values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + + dest = ir.IRRegister(location=self.location()) + result += [ir.IRAdd(self.location(), dest, *values_results)] + return result + def __init__(self, *values: Value): self.values = values @@ -82,14 +113,42 @@ class Sum(Node): class Sub(Node): - def __init__(self, *values: Value): + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + result: list[ir.IRAction] = [] + + first_val = self.first_value.intermediate_representation(context) + result += first_val + + values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + + for i, value_result in enumerate(values_results): + d = ir.IRRegister(location=self.location()) + result += [ir.IRNegation(location=self.location(), dest=d, source=value_result)] + values_results[i] = result[-1].destination() + + values_results += [first_val[-1].destination()] + + dest = ir.IRRegister(location=self.location()) + result += [ir.IRAdd(self.location(), dest, *values_results)] + return result + + def __init__(self, first_value: Value, *values: Value): + self.first_value = first_value self.values = values def _values(self) -> Any | list[Node]: - return self.values + return [self.first_value] + list(self.values) class Product(Node): + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + result: list[ir.IRAction] = [] + + values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + dest = ir.IRRegister(location=self.location()) + result += [ir.IRMul(self.location(), dest, *values_results)] + return result + def __init__(self, *values: Value): self.values = values @@ -98,11 +157,31 @@ class Product(Node): class Division(Node): - def __init__(self, *values: Value): + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + result: list[ir.IRAction] = [] + + first_val = self.first_value.intermediate_representation(context) + result += first_val + + values_results = Node._prepare_sources_ir(result=result, context=context, values=self.values) + + for i, value_result in enumerate(values_results): + d = ir.IRRegister(location=self.location()) + result += [ir.IRInvert(location=self.location(), dest=d, source=value_result)] + values_results[i] = result[-1].destination() + + values_results += [first_val[-1].destination()] + + dest = ir.IRRegister(location=self.location()) + result += [ir.IRMul(self.location(), dest, *values_results)] + return result + + def __init__(self, first_value: Value, *values: Value): + self.first_value = first_value self.values = values def _values(self) -> Any | list[Node]: - return self.values + return [self.first_value] + list(self.values) BinaryOperation = Sum | Sub | Product | Division @@ -122,6 +201,9 @@ class Integer(Literal): class Expression(Node): + def intermediate_representation(self, context: semantic.Context) -> list[ir.IRItem]: + return self.node.intermediate_representation(context) + def __init__(self, node: Node): self.node = node diff --git a/compiler/semantic.py b/compiler/semantic.py new file mode 100644 index 0000000..8296e48 --- /dev/null +++ b/compiler/semantic.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from .logger import Logger + +logger = Logger(__name__) + + +class Context: + pass