lexer: convert to an iterator

This avoids requiring to read all the data at once (will be useful for REPL).
This commit is contained in:
Antoine Viallon 2024-01-11 00:54:02 +01:00
parent 530214b254
commit ff2435171c
Signed by: aviallon
GPG key ID: 186FC35EDEB25716

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import collections.abc
import enum
import re
import typing
from dataclasses import dataclass, field
from typing import cast
@ -60,8 +61,8 @@ class Tokens(enum.Enum):
return self in [Tokens.KwLet]
class Lexer(collections.abc.Sequence):
def __init__(self, data: str):
class Lexer(collections.abc.Iterator):
def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None):
self.data = data
self.tokens = []
self.length: int | None = None
@ -69,32 +70,32 @@ class Lexer(collections.abc.Sequence):
self.end: int = 0
self.character: int = 0
self.line: int = 0
def __getitem__(self, index: int) -> Token:
while len(self) <= index + 1 and self.length is None:
self._next_token()
return self.tokens[index]
self.token_filter = token_filter
def __next__(self):
return self._next_token()
return self._filtered_next_token()
def __len__(self) -> int:
while self.length is None:
self._next_token()
def _filtered_next_token(self) -> Token:
tok = self._next_token()
if self.token_filter is not None:
while not self.token_filter(tok):
tok = self._next_token()
assert self.length is not None
return self.length
logger.debug(f"Returning token: {tok}")
return tok
def _next_token(self) -> Token:
actual_result: Token
if len(self.tokens) == 0:
self.tokens += [Token(Tokens.BEGIN,
tok = Token(Tokens.BEGIN,
loc=SourceLocation(
Location(line=0, character=0),
source=self.data
),
value="")]
value=None)
self.tokens.append(tok)
return tok
if self.begin < len(self.data):
best_result: Token = Token(Tokens.Unknown,
loc=SourceLocation(