From 530214b25490b598f232517b3923f476ac570bae Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Thu, 11 Jan 2024 00:52:17 +0100 Subject: [PATCH] parser: only require an iterator of tokens instead of a list --- compiler/parser.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/compiler/parser.py b/compiler/parser.py index f195dbb..89aef0c 100644 --- a/compiler/parser.py +++ b/compiler/parser.py @@ -1,5 +1,6 @@ from __future__ import annotations +import collections.abc from typing import Callable from .errors import CompilationError, UnexpectedTokenError @@ -13,20 +14,40 @@ tracer = Tracer(logger, level=LogLevel.Debug) class Parser: - def __init__(self, tokens: list[Token]): + def __init__(self, tokens: collections.abc.Iterator[Token]): self.tokens = tokens + self._token_cache: list[Token] = [] + self._EOF = False self.pos = 0 self._last_accepted_token: Tokens | None = None @property def token(self) -> Token: - if self.pos >= len(self.tokens): - return Token(kind=Tokens.EOF) - return self.tokens[self.pos] + if self._EOF: + return self._token_cache[-1] + + while len(self._token_cache) <= self.pos: + tok = next(self.tokens) + self._token_cache.append(tok) + if tok.kind == Tokens.EOF: + self._EOF = True + return tok + + return self._token_cache[self.pos] + + def _fetch_until(self, desired_length: int) -> int: + while len(self._token_cache) <= desired_length and not self._EOF: + tok = next(self.tokens) + self._token_cache.append(tok) + if tok.kind == Tokens.EOF: + self._EOF = True + break + + return len(self._token_cache) @property def prev_token(self) -> Token: - return self.tokens[self.pos - 1] + return self._token_cache[self.pos - 1] def next_symbol(self): self.pos += 1 @@ -48,10 +69,11 @@ class Parser: return False def peek_several(self, *tokens_types: Tokens) -> False | list[Token]: - if self.pos + len(tokens_types) >= len(self.tokens): + desired_pos = self.pos + len(tokens_types) + if desired_pos >= self._fetch_until(desired_pos + 1): return False - toks = self.tokens[self.pos:self.pos + len(tokens_types)] + toks = self._token_cache[self.pos:self.pos + len(tokens_types)] for i, token in enumerate(toks): if token.kind != tokens_types[i]: return False