From 816f4e47b00e2c10712c002fa01ede8a3d4901e3 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Fri, 1 Dec 2023 21:44:39 +0100 Subject: [PATCH] Added simple error handling for lexer and parser --- lexer/lexer.go | 43 +++++++++++++++++++++++++------------------ parser/parser.go | 18 +++++++++--------- parser/parser_test.go | 10 +++++----- token/token.go | 14 ++++++++++---- 4 files changed, 49 insertions(+), 36 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 850dcb3..b1b0c76 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -12,11 +12,13 @@ type Lexer struct { position int // current position in the input (points to current char) readPosition int // current reading position in the input (after current char) ch byte // current char under examination + line int // current line number + column int // current column number } // NewLexer creates and initializes a new Lexer with the given input string. func NewLexer(input string) *Lexer { - l := &Lexer{input: input} + l := &Lexer{input: input, line: 1, column: 0} l.readChar() // Initialize the first character return l } @@ -30,35 +32,31 @@ func (l *Lexer) NextToken() token.Token { // Switch on the current character to determine the token type switch l.ch { case '{': - tok = token.NewToken(token.BEGIN_OBJECT, l.ch) + tok = token.NewToken(token.BEGIN_OBJECT, l.ch, l.line, l.column) case '}': - tok = token.NewToken(token.END_OBJECT, l.ch) + tok = token.NewToken(token.END_OBJECT, l.ch, l.line, l.column) case '[': - tok = token.NewToken(token.BEGIN_ARRAY, l.ch) + tok = token.NewToken(token.BEGIN_ARRAY, l.ch, l.line, l.column) case ']': - tok = token.NewToken(token.END_ARRAY, l.ch) + tok = token.NewToken(token.END_ARRAY, l.ch, l.line, l.column) case ':': - tok = token.NewToken(token.NAME_SEPARATOR, l.ch) + tok = token.NewToken(token.NAME_SEPARATOR, l.ch, l.line, l.column) case ',': - tok = token.NewToken(token.VALUE_SEPARATOR, l.ch) + tok = token.NewToken(token.VALUE_SEPARATOR, l.ch, l.line, l.column) case '"': - tok.Type = token.STRING - tok.Value = l.readString() // Read a string token + tok = token.NewTokenWithValue(token.STRING, l.readString(), l.line, l.column) case 0: - tok.Value = "" - tok.Type = token.EOF // End of file/input + tok = token.NewTokenWithValue(token.EOF, "", l.line, l.column) default: // Handle numbers and identifiers or mark as illegal if isDigit(l.ch) || l.ch == '-' { - tok.Value = l.readNumber() - tok.Type = token.NUMBER - return tok + return token.NewTokenWithValue(token.NUMBER, l.readNumber(), l.line, l.column) } else if isLetter(l.ch) { - tok.Value = l.readIdentifier() - tok.Type = token.LookupIdent(tok.Value) - return tok + s := l.readIdentifier() + t := token.LookupIdent(s) + return token.NewTokenWithValue(t, s, l.line, l.column) } else { - tok = token.NewToken(token.ILLEGAL, l.ch) + tok = token.NewToken(token.ILLEGAL, l.ch, l.line, l.column) } } @@ -73,6 +71,15 @@ func (l *Lexer) readChar() { } else { l.ch = l.input[l.readPosition] } + + // update line and column number used in error management + if l.ch == '\n' { + l.line++ + l.column = 0 + } else { + l.column++ + } + l.position = l.readPosition l.readPosition++ } diff --git a/parser/parser.go b/parser/parser.go index 1d646c0..72f63ca 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -52,7 +52,7 @@ func (p *Parser) parseObject() JsonObject { // Ensure the current token is the beginning of an object if !p.curTokenIs(token.BEGIN_OBJECT) { - p.addError("expected '{'") + p.addError(fmt.Sprintf("expected '{' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value)) return nil } @@ -88,7 +88,7 @@ func (p *Parser) parseObject() JsonObject { // Handle comma separation for multiple key-value pairs if p.curTokenIs(token.VALUE_SEPARATOR) { if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object - p.addError("No ',' before '}'") + p.addError(fmt.Sprintf("No ',' before '}' at line %d, column %d", p.curToken.Line, p.curToken.Column)) return nil } @@ -98,7 +98,7 @@ func (p *Parser) parseObject() JsonObject { // Ensure the end of the object is reached if !p.curTokenIs(token.END_OBJECT) { - p.addError("expected '}'") + p.addError(fmt.Sprintf("expected '}' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value)) return nil } @@ -111,7 +111,7 @@ func (p *Parser) parseArray() JsonArray { // Ensure the current token is the beginning of an array if !p.curTokenIs(token.BEGIN_ARRAY) { - p.addError("expected '['") + p.addError(fmt.Sprintf("expected '[' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value)) return nil } @@ -153,7 +153,7 @@ func (p *Parser) addError(msg string) { // parseObjectKey parses and returns the key of an object field. func (p *Parser) parseObjectKey() string { if p.curToken.Type != token.STRING { - p.addError("expected string for key") + p.addError(fmt.Sprintf("expected string for key at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value)) return "" } return p.curToken.Value @@ -175,7 +175,7 @@ func (p *Parser) parseValue() (interface{}, error) { case token.BEGIN_ARRAY: return p.parseArray(), nil default: - p.addError("unexpected token") + p.addError(fmt.Sprintf("unexpected token '%s' at line %d, column %d", p.curToken.Value, p.curToken.Line, p.curToken.Column)) return nil, errors.New("unexpected token") } } @@ -189,7 +189,7 @@ func (p *Parser) parseNumber() interface{} { // Parse as float val, err := strconv.ParseFloat(numStr, 64) if err != nil { - p.addError(fmt.Sprintf("could not parse %q as float", numStr)) + p.addError(fmt.Sprintf("could not parse %q as float at line %d, column %d", numStr, p.curToken.Line, p.curToken.Column)) return nil } return val @@ -198,7 +198,7 @@ func (p *Parser) parseNumber() interface{} { // Parse as integer val, err := strconv.ParseInt(numStr, 10, 64) if err != nil { - p.addError(fmt.Sprintf("could not parse %q as integer", numStr)) + p.addError(fmt.Sprintf("could not parse %q as integer at line %d, column %d", numStr, p.curToken.Line, p.curToken.Column)) return nil } return val @@ -215,7 +215,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool { p.nextToken() return true } else { - p.addError(fmt.Sprintf("expected next token to be %v, got %v instead", t, p.peekToken.Type)) + p.addError(fmt.Sprintf("expected next token to be %v, got %v instead, at line %d, column %d", t, p.peekToken.Type, p.curToken.Line, p.curToken.Column)) return false } } diff --git a/parser/parser_test.go b/parser/parser_test.go index 7fcf47c..ae16446 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -62,7 +62,7 @@ func TestParseStep1Invalid(t *testing.T) { p := NewParser(l) parsed := p.Parse() - if len(p.errors) != 1 || p.errors[0] != "expected '{'" { + if len(p.errors) != 1 || p.errors[0] != "expected '{' at line 1, column 1, got ''" { t.Errorf("Not the expected error(s) during parsing, got %v", p.errors) } @@ -130,7 +130,7 @@ func TestParseStep2Invalid1(t *testing.T) { p := NewParser(l) parsed := p.Parse() - if len(p.errors) != 1 || p.errors[0] != "No ',' before '}'" { + if len(p.errors) != 1 || p.errors[0] != "No ',' before '}' at line 1, column 16" { t.Errorf("Not the expected error(s) during parsing, got %v", p.errors) } @@ -149,7 +149,7 @@ func TestParseStep2Invalid2(t *testing.T) { p := NewParser(l) parsed := p.Parse() - if len(p.errors) != 1 || p.errors[0] != "expected string for key" { + if len(p.errors) != 1 || p.errors[0] != "expected string for key at line 3, column 6, got 'key'" { t.Errorf("Not the expected error(s) during parsing, got %v", p.errors) } @@ -205,7 +205,7 @@ func TestParseStep3Invalid(t *testing.T) { p := NewParser(l) parsed := p.Parse() - if len(p.errors) != 1 || p.errors[0] != "unexpected token" { + if len(p.errors) != 1 || p.errors[0] != "unexpected token 'False' at line 3, column 16" { t.Errorf("Not the expected error(s) during parsing, got %v", p.errors) } @@ -296,7 +296,7 @@ func TestParseStep4Invalid(t *testing.T) { p := NewParser(l) parsed := p.Parse() - if len(p.errors) != 2 || p.errors[0] != "unexpected token" || p.errors[1] != "expected string for key" { + if len(p.errors) != 2 || p.errors[0] != "unexpected token ''' at line 7, column 13" || p.errors[1] != "expected string for key at line 7, column 18, got 'list'" { t.Errorf("Not the expected error(s) during parsing, got %v", p.errors) } diff --git a/token/token.go b/token/token.go index 4c1da34..2485d57 100644 --- a/token/token.go +++ b/token/token.go @@ -27,12 +27,18 @@ const ( ) type Token struct { - Type TokenType - Value string + Type TokenType + Value string + Line int + Column int } -func NewToken(tokenType TokenType, ch byte) Token { - return Token{Type: tokenType, Value: string(ch)} +func NewToken(tokenType TokenType, ch byte, l int, c int) Token { + return Token{Type: tokenType, Value: string(ch), Line: l, Column: c} +} + +func NewTokenWithValue(tokenType TokenType, val string, l int, c int) Token { + return Token{Type: tokenType, Value: val, Line: l, Column: c} } var keywords = map[string]TokenType{