lexer: fix EOF error when feeding partial input to the lexer.

This commit is contained in:
Antoine Viallon 2024-01-11 18:48:30 +01:00
parent 9921d9fccd
commit c2751847d8
Signed by: aviallon
GPG key ID: 186FC35EDEB25716

View file

@ -62,8 +62,9 @@ class Tokens(enum.Enum):
class Lexer(collections.abc.Iterator):
def __init__(self, data: str, token_filter: typing.Callable[[Token], bool] | None = None):
self.data = data
def __init__(self, input_stream: typing.TextIO, token_filter: typing.Callable[[Token], bool] | None = None):
self.input = input_stream
self.data: str = ""
self.tokens = []
self.length: int | None = None
self.begin: int = 0
@ -96,7 +97,17 @@ class Lexer(collections.abc.Iterator):
self.tokens.append(tok)
return tok
if self.begin < len(self.data):
if self.tokens[-1].kind in [Tokens.BEGIN, Tokens.Newline]:
self.data += self.input.readline()
if self.begin == len(self.data):
eof_token = Token(Tokens.EOF, value=None, loc=SourceLocation(
Location(line=self.line, character=0),
))
self.tokens += [eof_token]
self.length = len(self.tokens)
return eof_token
elif self.begin < len(self.data):
best_result: Token = Token(Tokens.Unknown,
loc=SourceLocation(
Location(line=self.line, character=self.character),
@ -139,12 +150,5 @@ class Lexer(collections.abc.Iterator):
self.tokens += [best_result]
return best_result
elif self.begin == len(self.data):
eof_token = Token(Tokens.EOF, value=None, loc=SourceLocation(
Location(line=self.line, character=0), source=self.data
))
self.tokens += [eof_token]
self.length = len(self.tokens)
return eof_token
else:
raise IndexError("EOF already reached")