lexer: convert to an iterator

This avoids requiring to read all the data at once (will be useful for REPL).
This commit is contained in:
Antoine Viallon 2024-01-11 00:54:02 +01:00
parent 530214b254
commit ff2435171c
Signed by: aviallon
GPG key ID: 186FC35EDEB25716

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import collections.abc import collections.abc
import enum import enum
import re import re
import typing
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import cast from typing import cast
@ -60,8 +61,8 @@ class Tokens(enum.Enum):
return self in [Tokens.KwLet] return self in [Tokens.KwLet]
class Lexer(collections.abc.Sequence): class Lexer(collections.abc.Iterator):
def __init__(self, data: str): def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None):
self.data = data self.data = data
self.tokens = [] self.tokens = []
self.length: int | None = None self.length: int | None = None
@ -69,32 +70,32 @@ class Lexer(collections.abc.Sequence):
self.end: int = 0 self.end: int = 0
self.character: int = 0 self.character: int = 0
self.line: int = 0 self.line: int = 0
self.token_filter = token_filter
def __getitem__(self, index: int) -> Token:
while len(self) <= index + 1 and self.length is None:
self._next_token()
return self.tokens[index]
def __next__(self): def __next__(self):
return self._next_token() return self._filtered_next_token()
def __len__(self) -> int: def _filtered_next_token(self) -> Token:
while self.length is None: tok = self._next_token()
self._next_token() if self.token_filter is not None:
while not self.token_filter(tok):
tok = self._next_token()
assert self.length is not None logger.debug(f"Returning token: {tok}")
return tok
return self.length
def _next_token(self) -> Token: def _next_token(self) -> Token:
actual_result: Token actual_result: Token
if len(self.tokens) == 0: if len(self.tokens) == 0:
self.tokens += [Token(Tokens.BEGIN, tok = Token(Tokens.BEGIN,
loc=SourceLocation( loc=SourceLocation(
Location(line=0, character=0), Location(line=0, character=0),
source=self.data source=self.data
), ),
value="")] value=None)
self.tokens.append(tok)
return tok
if self.begin < len(self.data): if self.begin < len(self.data):
best_result: Token = Token(Tokens.Unknown, best_result: Token = Token(Tokens.Unknown,
loc=SourceLocation( loc=SourceLocation(