From c2751847d85dc6d09e096d430951b3e466b68597 Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Thu, 11 Jan 2024 18:48:30 +0100 Subject: [PATCH] lexer: fix EOF error when feeding partial input to the lexer. --- compiler/lexer.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/compiler/lexer.py b/compiler/lexer.py index a7ab620..3cd05b9 100644 --- a/compiler/lexer.py +++ b/compiler/lexer.py @@ -62,8 +62,9 @@ class Tokens(enum.Enum): class Lexer(collections.abc.Iterator): - def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None): - self.data = data + def __init__(self, input_stream: typing.TextIO, token_filter: typing.Callable[[Token], bool] | None = None): + self.input = input_stream + self.data: str = "" self.tokens = [] self.length: int | None = None self.begin: int = 0 @@ -96,7 +97,17 @@ class Lexer(collections.abc.Iterator): self.tokens.append(tok) return tok - if self.begin < len(self.data): + if self.tokens[-1].kind in [Tokens.BEGIN, Tokens.Newline]: + self.data += self.input.readline() + + if self.begin == len(self.data): + eof_token = Token(Tokens.EOF, value=None, loc=SourceLocation( + Location(line=self.line, character=0), + )) + self.tokens += [eof_token] + self.length = len(self.tokens) + return eof_token + elif self.begin < len(self.data): best_result: Token = Token(Tokens.Unknown, loc=SourceLocation( Location(line=self.line, character=self.character), @@ -139,12 +150,5 @@ class Lexer(collections.abc.Iterator): self.tokens += [best_result] return best_result - elif self.begin == len(self.data): - eof_token = Token(Tokens.EOF, value=None, loc=SourceLocation( - Location(line=self.line, character=0), source=self.data - )) - self.tokens += [eof_token] - self.length = len(self.tokens) - return eof_token else: raise IndexError("EOF already reached")