compiler/compiler/semantic.py

249 lines
9 KiB
Python

from __future__ import annotations
import abc
from typing import Literal
from . import nodes, lexer, source
from .errors import SemanticAnalysisError, CompilationWarning
from .logger import Logger, Tracer, LogLevel
from .typechecking import typecheck
logger = Logger(__name__)
tracer = Tracer(logger=logger, level=LogLevel.Debug)
builtin_node = nodes.PseudoNode(
lexer.Token(
kind=lexer.Tokens.Unknown,
value="__compiler_internal__",
loc=source.SourceLocation(begin=source.Location(internal=True), source="__compiler_internal__")
)
)
class SymbolABC(abc.ABC):
def __init__(self, context: Context, name: str,
definition: nodes.Node | None = None,
value: nodes.Node | None | Literal["builtin"] = None,
builtin: bool = False):
self.context = context
self.name = name
self.writes: list[nodes.Node] = []
if isinstance(value, nodes.Node):
self.writes += [value]
self.definition = definition
if builtin:
self.writes += [builtin_node]
self.definition = builtin_node
self.reads: list[nodes.Node] = []
self._repr_guard: bool = False
def fully_qualified_name(self) -> str:
return f"{self.context.fully_qualified_name()}.{self.name}"
def __str__(self):
return f"{self.__class__.__name__}({self.name})"
def __repr__(self):
if self._repr_guard:
return str(self)
self._repr_guard = True
writes = [str(d.location().begin) for d in self.writes]
reads = [str(d.location().begin) for d in self.reads]
self._repr_guard = False
return f"{str(self)} [writes: {', '.join(writes)}; reads: {', '.join(reads)}]"
class Type(SymbolABC):
def __init__(self, context: Context, name: str,
definition: nodes.Node | None = None,
value: nodes.Value | None | Literal["builtin"] = None,
builtin: bool = False):
if builtin:
value = "builtin"
super().__init__(context, name, definition=definition, value=value, builtin=builtin)
class Function(Type):
pass
class Variable(SymbolABC):
@typecheck
def __init__(self, context: Context, name: str,
definition: nodes.Definition | None = None,
value: nodes.Value | None = None,
typedef: Type | None = None,
builtin: bool = False):
super().__init__(context, name, definition=definition, value=value, builtin=builtin)
self.type = typedef if typedef is not None else context.get_type("__unknown")
def __str__(self):
return f"{self.__class__.__name__}({self.name} : {self.type.fully_qualified_name()})"
class Context:
_id_sequence = 0
def __init__(self, name: str | None = None, parent: Context | None = None):
self.parent = parent
self.variables: dict[str, Variable] = {}
self.types: dict[str, Type] = {}
self.child_contexts: dict[str, Context] = {}
self.name = str(Context._id_sequence)
if name is not None:
self.name = f"{name}_{Context._id_sequence}"
Context._id_sequence += 1
def fully_qualified_name(self) -> str:
if self.parent is None:
return str(self.name)
return f"{self.parent.fully_qualified_name()}::{self.name}"
@typecheck
def _resolve_symbol(self, attribute_name: str, name: str) -> SymbolABC | None:
attribute = getattr(self, attribute_name)
if name in attribute:
return attribute[name]
elif self.parent is None:
return None
elif (var := self.parent._resolve_symbol(attribute_name, name)) is not None:
return var
return None
@typecheck
def _get_symbol(self, attribute_name: str, name: str, reader: nodes.Node | None = None) -> SymbolABC | None:
node = self._resolve_symbol(attribute_name, name)
if node is not None and reader is not None:
node.reads += [reader]
return node
def get_variable(self, name: str, reader: nodes.Node | None = None) -> Variable | None:
return self._get_symbol("variables", name, reader)
def define_variable(self, name: str,
definition: nodes.Definition,
type_identifier: nodes.Identifier, value: nodes.Value) -> Variable:
if name in self.variables:
raise SemanticAnalysisError(value.location())
upper_definition = self._get_symbol("variables", name)
if upper_definition is not None:
CompilationWarning(definition.location(),
f"Shadowing previous definition {upper_definition.definition.location()}").raise_warning()
typedef = self.get_type(type_identifier.value)
if typedef is None:
raise SemanticAnalysisError(location=type_identifier.location(),
message=f"Unknown type '{type_identifier.value}'")
variable = Variable(self, name, definition=definition, value=value, typedef=typedef)
self.variables[name] = variable
return variable
def set_variable(self, name: str, value: nodes.Value) -> Variable:
assert value is not None
if name not in self.variables:
raise SemanticAnalysisError(value.location(),
message=f"Can't assign a value to undeclared variable '{name}'")
variable = self.variables[name]
variable.writes += [value]
self.variables[name] = variable
return variable
@typecheck
def set_type(self, name: str, value: nodes.Value) -> Type | None:
typedef: Type
if name in self.types:
typedef = self.types[name]
typedef.writes += [value]
else:
typedef = Type(self, name, value=value)
self.types[name] = typedef
return typedef
@typecheck
def get_type(self, name: str, reader: nodes.Node | None = None) -> Type | None:
return self._get_symbol("types", name, reader)
def add_context(self, context: Context) -> None:
self.child_contexts[context.name] = context
def __str__(self):
return f"{self.__class__.__name__}(id={repr(self.name)})"
def _pprint(self, depth: int = 0) -> str:
result = [str(self)]
if self.parent is not None:
result += [f"\tParent ID: {self.parent.name}"]
if len(self.types) > 0:
result += [f"\tTypes ({len(self.types)}):"]
for key, value in self.types.items():
result += [f"\t\t- {repr(value)}"]
if len(self.variables) > 0:
result += [f"\tVariables ({len(self.variables)}):"]
for key, value in self.variables.items():
result += [f"\t\t- {repr(value)}"]
if len(self.child_contexts) > 0:
result += [f"\tChild contexts ({len(self.child_contexts)}):"]
for key, value in self.child_contexts.items():
ctx_repr = value._pprint(depth=depth + 1).splitlines(keepends=False)
result += [f"\t\t{line}" for line in ctx_repr]
return "\n".join(result)
def __repr__(self) -> str:
return self._pprint()
@tracer.trace_method
def check(self) -> None:
for variable in self.variables.values():
# Check for reads before assignments
for read in variable.reads:
previous_write = None
for write in variable.writes:
if write == "builtin":
continue
if write.location() < read.location():
previous_write = write
break
if previous_write is None:
message = f"Reading undefined variable {variable.name}"
if len(variable.writes) > 0:
first_write = min(variable.writes, key=lambda w: w.location())
message += f" (first assignment is at {first_write.location().begin})"
raise SemanticAnalysisError(location=read.location(),
message=message)
# Check for unused variables
if len(variable.reads) == 0 and variable.definition is not None:
CompilationWarning(variable.definition.location(),
message=f"Variable '{variable.name}' is unused").raise_warning()
for context in self.child_contexts.values():
context.check()
class BuiltinContext(Context):
def __init__(self):
super().__init__(name="builtins", parent=None)
self.types = {
"__unknown": Type(self, "__unknown", builtin=True),
"__function": Function(self, "__function", builtin=True),
"uint32": Type(self, "uint32", builtin=True)
}
self.variables = {
"display": Variable(self, "display", typedef=self.types["__function"], builtin=True)
}