From 816f4e47b00e2c10712c002fa01ede8a3d4901e3 Mon Sep 17 00:00:00 2001
From: oabrivard <olivier@abrivard.fr>
Date: Fri, 1 Dec 2023 21:44:39 +0100
Subject: [PATCH] Added simple error handling for lexer and parser

---
 lexer/lexer.go        | 43 +++++++++++++++++++++++++------------------
 parser/parser.go      | 18 +++++++++---------
 parser/parser_test.go | 10 +++++-----
 token/token.go        | 14 ++++++++++----
 4 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/lexer/lexer.go b/lexer/lexer.go
index 850dcb3..b1b0c76 100644
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@@ -12,11 +12,13 @@ type Lexer struct {
 	position     int    // current position in the input (points to current char)
 	readPosition int    // current reading position in the input (after current char)
 	ch           byte   // current char under examination
+	line         int    // current line number
+	column       int    // current column number
 }
 
 // NewLexer creates and initializes a new Lexer with the given input string.
 func NewLexer(input string) *Lexer {
-	l := &Lexer{input: input}
+	l := &Lexer{input: input, line: 1, column: 0}
 	l.readChar() // Initialize the first character
 	return l
 }
@@ -30,35 +32,31 @@ func (l *Lexer) NextToken() token.Token {
 	// Switch on the current character to determine the token type
 	switch l.ch {
 	case '{':
-		tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
+		tok = token.NewToken(token.BEGIN_OBJECT, l.ch, l.line, l.column)
 	case '}':
-		tok = token.NewToken(token.END_OBJECT, l.ch)
+		tok = token.NewToken(token.END_OBJECT, l.ch, l.line, l.column)
 	case '[':
-		tok = token.NewToken(token.BEGIN_ARRAY, l.ch)
+		tok = token.NewToken(token.BEGIN_ARRAY, l.ch, l.line, l.column)
 	case ']':
-		tok = token.NewToken(token.END_ARRAY, l.ch)
+		tok = token.NewToken(token.END_ARRAY, l.ch, l.line, l.column)
 	case ':':
-		tok = token.NewToken(token.NAME_SEPARATOR, l.ch)
+		tok = token.NewToken(token.NAME_SEPARATOR, l.ch, l.line, l.column)
 	case ',':
-		tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
+		tok = token.NewToken(token.VALUE_SEPARATOR, l.ch, l.line, l.column)
 	case '"':
-		tok.Type = token.STRING
-		tok.Value = l.readString() // Read a string token
+		tok = token.NewTokenWithValue(token.STRING, l.readString(), l.line, l.column)
 	case 0:
-		tok.Value = ""
-		tok.Type = token.EOF // End of file/input
+		tok = token.NewTokenWithValue(token.EOF, "", l.line, l.column)
 	default:
 		// Handle numbers and identifiers or mark as illegal
 		if isDigit(l.ch) || l.ch == '-' {
-			tok.Value = l.readNumber()
-			tok.Type = token.NUMBER
-			return tok
+			return token.NewTokenWithValue(token.NUMBER, l.readNumber(), l.line, l.column)
 		} else if isLetter(l.ch) {
-			tok.Value = l.readIdentifier()
-			tok.Type = token.LookupIdent(tok.Value)
-			return tok
+			s := l.readIdentifier()
+			t := token.LookupIdent(s)
+			return token.NewTokenWithValue(t, s, l.line, l.column)
 		} else {
-			tok = token.NewToken(token.ILLEGAL, l.ch)
+			tok = token.NewToken(token.ILLEGAL, l.ch, l.line, l.column)
 		}
 	}
 
@@ -73,6 +71,15 @@ func (l *Lexer) readChar() {
 	} else {
 		l.ch = l.input[l.readPosition]
 	}
+
+	// update line and column number used in error management
+	if l.ch == '\n' {
+		l.line++
+		l.column = 0
+	} else {
+		l.column++
+	}
+
 	l.position = l.readPosition
 	l.readPosition++
 }
diff --git a/parser/parser.go b/parser/parser.go
index 1d646c0..72f63ca 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -52,7 +52,7 @@ func (p *Parser) parseObject() JsonObject {
 
 	// Ensure the current token is the beginning of an object
 	if !p.curTokenIs(token.BEGIN_OBJECT) {
-		p.addError("expected '{'")
+		p.addError(fmt.Sprintf("expected '{' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value))
 		return nil
 	}
 
@@ -88,7 +88,7 @@ func (p *Parser) parseObject() JsonObject {
 		// Handle comma separation for multiple key-value pairs
 		if p.curTokenIs(token.VALUE_SEPARATOR) {
 			if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object
-				p.addError("No ',' before '}'")
+				p.addError(fmt.Sprintf("No ',' before '}' at line %d, column %d", p.curToken.Line, p.curToken.Column))
 				return nil
 			}
 
@@ -98,7 +98,7 @@ func (p *Parser) parseObject() JsonObject {
 
 	// Ensure the end of the object is reached
 	if !p.curTokenIs(token.END_OBJECT) {
-		p.addError("expected '}'")
+		p.addError(fmt.Sprintf("expected '}' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value))
 		return nil
 	}
 
@@ -111,7 +111,7 @@ func (p *Parser) parseArray() JsonArray {
 
 	// Ensure the current token is the beginning of an array
 	if !p.curTokenIs(token.BEGIN_ARRAY) {
-		p.addError("expected '['")
+		p.addError(fmt.Sprintf("expected '[' at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value))
 		return nil
 	}
 
@@ -153,7 +153,7 @@ func (p *Parser) addError(msg string) {
 // parseObjectKey parses and returns the key of an object field.
 func (p *Parser) parseObjectKey() string {
 	if p.curToken.Type != token.STRING {
-		p.addError("expected string for key")
+		p.addError(fmt.Sprintf("expected string for key at line %d, column %d, got '%s'", p.curToken.Line, p.curToken.Column, p.curToken.Value))
 		return ""
 	}
 	return p.curToken.Value
@@ -175,7 +175,7 @@ func (p *Parser) parseValue() (interface{}, error) {
 	case token.BEGIN_ARRAY:
 		return p.parseArray(), nil
 	default:
-		p.addError("unexpected token")
+		p.addError(fmt.Sprintf("unexpected token '%s' at line %d, column %d", p.curToken.Value, p.curToken.Line, p.curToken.Column))
 		return nil, errors.New("unexpected token")
 	}
 }
@@ -189,7 +189,7 @@ func (p *Parser) parseNumber() interface{} {
 		// Parse as float
 		val, err := strconv.ParseFloat(numStr, 64)
 		if err != nil {
-			p.addError(fmt.Sprintf("could not parse %q as float", numStr))
+			p.addError(fmt.Sprintf("could not parse %q as float at line %d, column %d", numStr, p.curToken.Line, p.curToken.Column))
 			return nil
 		}
 		return val
@@ -198,7 +198,7 @@ func (p *Parser) parseNumber() interface{} {
 	// Parse as integer
 	val, err := strconv.ParseInt(numStr, 10, 64)
 	if err != nil {
-		p.addError(fmt.Sprintf("could not parse %q as integer", numStr))
+		p.addError(fmt.Sprintf("could not parse %q as integer at line %d, column %d", numStr, p.curToken.Line, p.curToken.Column))
 		return nil
 	}
 	return val
@@ -215,7 +215,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool {
 		p.nextToken()
 		return true
 	} else {
-		p.addError(fmt.Sprintf("expected next token to be %v, got %v instead", t, p.peekToken.Type))
+		p.addError(fmt.Sprintf("expected next token to be %v, got %v instead, at line %d, column %d", t, p.peekToken.Type, p.curToken.Line, p.curToken.Column))
 		return false
 	}
 }
diff --git a/parser/parser_test.go b/parser/parser_test.go
index 7fcf47c..ae16446 100644
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -62,7 +62,7 @@ func TestParseStep1Invalid(t *testing.T) {
 	p := NewParser(l)
 	parsed := p.Parse()
 
-	if len(p.errors) != 1 || p.errors[0] != "expected '{'" {
+	if len(p.errors) != 1 || p.errors[0] != "expected '{' at line 1, column 1, got ''" {
 		t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
 	}
 
@@ -130,7 +130,7 @@ func TestParseStep2Invalid1(t *testing.T) {
 	p := NewParser(l)
 	parsed := p.Parse()
 
-	if len(p.errors) != 1 || p.errors[0] != "No ',' before '}'" {
+	if len(p.errors) != 1 || p.errors[0] != "No ',' before '}' at line 1, column 16" {
 		t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
 	}
 
@@ -149,7 +149,7 @@ func TestParseStep2Invalid2(t *testing.T) {
 	p := NewParser(l)
 	parsed := p.Parse()
 
-	if len(p.errors) != 1 || p.errors[0] != "expected string for key" {
+	if len(p.errors) != 1 || p.errors[0] != "expected string for key at line 3, column 6, got 'key'" {
 		t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
 	}
 
@@ -205,7 +205,7 @@ func TestParseStep3Invalid(t *testing.T) {
 	p := NewParser(l)
 	parsed := p.Parse()
 
-	if len(p.errors) != 1 || p.errors[0] != "unexpected token" {
+	if len(p.errors) != 1 || p.errors[0] != "unexpected token 'False' at line 3, column 16" {
 		t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
 	}
 
@@ -296,7 +296,7 @@ func TestParseStep4Invalid(t *testing.T) {
 	p := NewParser(l)
 	parsed := p.Parse()
 
-	if len(p.errors) != 2 || p.errors[0] != "unexpected token" || p.errors[1] != "expected string for key" {
+	if len(p.errors) != 2 || p.errors[0] != "unexpected token ''' at line 7, column 13" || p.errors[1] != "expected string for key at line 7, column 18, got 'list'" {
 		t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
 	}
 
diff --git a/token/token.go b/token/token.go
index 4c1da34..2485d57 100644
--- a/token/token.go
+++ b/token/token.go
@@ -27,12 +27,18 @@ const (
 )
 
 type Token struct {
-	Type  TokenType
-	Value string
+	Type   TokenType
+	Value  string
+	Line   int
+	Column int
 }
 
-func NewToken(tokenType TokenType, ch byte) Token {
-	return Token{Type: tokenType, Value: string(ch)}
+func NewToken(tokenType TokenType, ch byte, l int, c int) Token {
+	return Token{Type: tokenType, Value: string(ch), Line: l, Column: c}
+}
+
+func NewTokenWithValue(tokenType TokenType, val string, l int, c int) Token {
+	return Token{Type: tokenType, Value: val, Line: l, Column: c}
 }
 
 var keywords = map[string]TokenType{