From ff2435171cf514f503a32442bf2b3ae0d11d2884 Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Thu, 11 Jan 2024 00:54:02 +0100 Subject: [PATCH] lexer: convert to an iterator This avoids requiring to read all the data at once (will be useful for REPL). --- compiler/lexer.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/compiler/lexer.py b/compiler/lexer.py index 912d839..a7ab620 100644 --- a/compiler/lexer.py +++ b/compiler/lexer.py @@ -3,6 +3,7 @@ from __future__ import annotations import collections.abc import enum import re +import typing from dataclasses import dataclass, field from typing import cast @@ -60,8 +61,8 @@ class Tokens(enum.Enum): return self in [Tokens.KwLet] -class Lexer(collections.abc.Sequence): - def __init__(self, data: str): +class Lexer(collections.abc.Iterator): + def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None): self.data = data self.tokens = [] self.length: int | None = None @@ -69,32 +70,32 @@ class Lexer(collections.abc.Sequence): self.end: int = 0 self.character: int = 0 self.line: int = 0 - - def __getitem__(self, index: int) -> Token: - while len(self) <= index + 1 and self.length is None: - self._next_token() - return self.tokens[index] + self.token_filter = token_filter def __next__(self): - return self._next_token() + return self._filtered_next_token() - def __len__(self) -> int: - while self.length is None: - self._next_token() + def _filtered_next_token(self) -> Token: + tok = self._next_token() + if self.token_filter is not None: + while not self.token_filter(tok): + tok = self._next_token() - assert self.length is not None - - return self.length + logger.debug(f"Returning token: {tok}") + return tok def _next_token(self) -> Token: actual_result: Token if len(self.tokens) == 0: - self.tokens += [Token(Tokens.BEGIN, - loc=SourceLocation( - Location(line=0, character=0), - source=self.data - ), - value="")] + tok = Token(Tokens.BEGIN, + loc=SourceLocation( + Location(line=0, character=0), + source=self.data + ), + value=None) + self.tokens.append(tok) + return tok + if self.begin < len(self.data): best_result: Token = Token(Tokens.Unknown, loc=SourceLocation(