lexer: convert to an iterator
This avoids requiring to read all the data at once (will be useful for REPL).
This commit is contained in:
parent
530214b254
commit
ff2435171c
1 changed files with 21 additions and 20 deletions
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||||
import collections.abc
|
import collections.abc
|
||||||
import enum
|
import enum
|
||||||
import re
|
import re
|
||||||
|
import typing
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
|
|
@ -60,8 +61,8 @@ class Tokens(enum.Enum):
|
||||||
return self in [Tokens.KwLet]
|
return self in [Tokens.KwLet]
|
||||||
|
|
||||||
|
|
||||||
class Lexer(collections.abc.Sequence):
|
class Lexer(collections.abc.Iterator):
|
||||||
def __init__(self, data: str):
|
def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None):
|
||||||
self.data = data
|
self.data = data
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
self.length: int | None = None
|
self.length: int | None = None
|
||||||
|
|
@ -69,32 +70,32 @@ class Lexer(collections.abc.Sequence):
|
||||||
self.end: int = 0
|
self.end: int = 0
|
||||||
self.character: int = 0
|
self.character: int = 0
|
||||||
self.line: int = 0
|
self.line: int = 0
|
||||||
|
self.token_filter = token_filter
|
||||||
def __getitem__(self, index: int) -> Token:
|
|
||||||
while len(self) <= index + 1 and self.length is None:
|
|
||||||
self._next_token()
|
|
||||||
return self.tokens[index]
|
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
return self._next_token()
|
return self._filtered_next_token()
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def _filtered_next_token(self) -> Token:
|
||||||
while self.length is None:
|
tok = self._next_token()
|
||||||
self._next_token()
|
if self.token_filter is not None:
|
||||||
|
while not self.token_filter(tok):
|
||||||
|
tok = self._next_token()
|
||||||
|
|
||||||
assert self.length is not None
|
logger.debug(f"Returning token: {tok}")
|
||||||
|
return tok
|
||||||
return self.length
|
|
||||||
|
|
||||||
def _next_token(self) -> Token:
|
def _next_token(self) -> Token:
|
||||||
actual_result: Token
|
actual_result: Token
|
||||||
if len(self.tokens) == 0:
|
if len(self.tokens) == 0:
|
||||||
self.tokens += [Token(Tokens.BEGIN,
|
tok = Token(Tokens.BEGIN,
|
||||||
loc=SourceLocation(
|
loc=SourceLocation(
|
||||||
Location(line=0, character=0),
|
Location(line=0, character=0),
|
||||||
source=self.data
|
source=self.data
|
||||||
),
|
),
|
||||||
value="")]
|
value=None)
|
||||||
|
self.tokens.append(tok)
|
||||||
|
return tok
|
||||||
|
|
||||||
if self.begin < len(self.data):
|
if self.begin < len(self.data):
|
||||||
best_result: Token = Token(Tokens.Unknown,
|
best_result: Token = Token(Tokens.Unknown,
|
||||||
loc=SourceLocation(
|
loc=SourceLocation(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue