Added comments for lexer and parser (comments were all generated by ChatGPT)

2 years ago · b7502143c4
parent 4c2af09c99
commit b7502143c4
13 changed files with 56 additions and 85 deletions
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -1,27 +1,33 @@
 // Package lexer defines the structure and methods for lexical analysis of JSON.
 package lexer
 import (
 	"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
 )
 // Lexer struct represents a lexical analyzer with its input, current position,
 // next reading position, and current character.
 type Lexer struct {
-	input        string
+	input        string // the string being scanned
-	position     int
+	position     int    // current position in the input (points to current char)
-	readPosition int
+	readPosition int    // current reading position in the input (after current char)
-	ch           byte
+	ch           byte   // current char under examination
 }
 // NewLexer creates and initializes a new Lexer with the given input string.
 func NewLexer(input string) *Lexer {
 	l := &Lexer{input: input}
-	l.readChar()
+	l.readChar() // Initialize the first character
 	return l
 }
 // NextToken reads the next token from the input and returns it.
 func (l *Lexer) NextToken() token.Token {
 	var tok token.Token
-	l.skipWhitespace()
+	l.skipWhitespace() // Skip any whitespace before the next token
 	// Switch on the current character to determine the token type
 	switch l.ch {
 	case '{':
 		tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
@ -37,11 +43,12 @@ func (l *Lexer) NextToken() token.Token {
 		tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
 	case '"':
 		tok.Type = token.STRING
-		tok.Value = l.readString()
+		tok.Value = l.readString() // Read a string token
 	case 0:
 		tok.Value = ""
-		tok.Type = token.EOF
+		tok.Type = token.EOF // End of file/input
 	default:
 		// Handle numbers and identifiers or mark as illegal
 		if isDigit(l.ch) || l.ch == '-' {
 			tok.Value = l.readNumber()
 			tok.Type = token.NUMBER
@ -55,13 +62,14 @@ func (l *Lexer) NextToken() token.Token {
 		}
 	}
-	l.readChar()
+	l.readChar() // Move to the next character
 	return tok
 }
 // readChar advances to the next character in the input.
 func (l *Lexer) readChar() {
 	if l.readPosition >= len(l.input) {
-		l.ch = 0
+		l.ch = 0 // End of input
 	} else {
 		l.ch = l.input[l.readPosition]
 	}
@ -69,12 +77,14 @@ func (l *Lexer) readChar() {
 	l.readPosition++
 }
 // skipWhitespace skips over any whitespace characters in the input.
 func (l *Lexer) skipWhitespace() {
 	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
 		l.readChar()
 	}
 }
 // readNumber reads a number (integer or floating point) from the input.
 func (l *Lexer) readNumber() string {
 	position := l.position
 	for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
@ -83,10 +93,12 @@ func (l *Lexer) readNumber() string {
 	return l.input[position:l.position]
 }
 // isDigit checks if a character is a digit.
 func isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
 // readString reads a string from the input, handling escaped quotes.
 func (l *Lexer) readString() string {
 	position := l.position + 1
 	for {
@ -98,6 +110,7 @@ func (l *Lexer) readString() string {
 	return l.input[position:l.position]
 }
 // readIdentifier reads an identifier from the input.
 func (l *Lexer) readIdentifier() string {
 	position := l.position
 	for isLetter(l.ch) {
@ -106,6 +119,7 @@ func (l *Lexer) readIdentifier() string {
 	return l.input[position:l.position]
 }
 // isLetter checks if a character is a letter or underscore.
 func isLetter(ch byte) bool {
 	return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
 }
--- a/parser/parser.go
+++ b/parser/parser.go
@ -1,3 +1,4 @@
 // Package parser defines the structure and methods for parsing JSON.
 package parser
 import (
@ -10,15 +11,18 @@ import (
 	"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
 )
 // Parser struct represents a parser with a lexer, current and peek tokens,
 // and a slice to store parsing errors.
 type Parser struct {
-	lexer *lexer.Lexer
+	lexer *lexer.Lexer // the lexer from which the parser receives tokens
-	curToken  token.Token
+	curToken  token.Token // current token under examination
-	peekToken token.Token
+	peekToken token.Token // next token in the input
-	errors []string
+	errors []string // slice to store errors encountered during parsing
 }
 // NewParser creates and initializes a new Parser with the given lexer.
 func NewParser(l *lexer.Lexer) *Parser {
 	p := &Parser{lexer: l}
 	// Initialize curToken and peekToken
@ -27,24 +31,26 @@ func NewParser(l *lexer.Lexer) *Parser {
 	return p
 }
 // nextToken advances both curToken and peekToken.
 func (p *Parser) nextToken() {
 	p.curToken = p.peekToken
 	p.peekToken = p.lexer.NextToken()
 }
-// Methods to parse JSON structure
+// JsonObject and JsonArray are types to represent JSON objects and arrays, respectively.
 type JsonObject map[string]interface{}
 type JsonArray []interface{}
 // Parse starts the parsing process and returns the top-level JSON object.
 func (p *Parser) Parse() JsonObject {
 	return p.parseObject()
 }
 // parseObject parses a JSON object from the token stream.
 func (p *Parser) parseObject() JsonObject {
 	object := make(JsonObject)
-	// Expect the current token to be TokenBeginObject
+	// Ensure the current token is the beginning of an object
 	if !p.curTokenIs(token.BEGIN_OBJECT) {
 		p.addError("expected '{'")
 		return nil
@ -53,14 +59,14 @@ func (p *Parser) parseObject() JsonObject {
 	// Move to the next token
 	p.nextToken()
-	// Loop until we find an end object token
+	// Loop until the end of the object is reached
 	for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) {
 		key := p.parseObjectKey()
 		if key == "" {
 			return nil
 		}
-		// Expect a name separator (:)
+		// Ensure a name separator (:) follows the key
 		if !p.expectPeek(token.NAME_SEPARATOR) {
 			return nil
 		}
@ -76,12 +82,12 @@ func (p *Parser) parseObject() JsonObject {
 		object[key] = value
-		// Move past the value, potentially to a comma or the end object
+		// Move past the value
 		p.nextToken()
-		// If we have a comma, the object continues
+		// Handle comma separation for multiple key-value pairs
 		if p.curTokenIs(token.VALUE_SEPARATOR) {
-			if p.peekToken.Type == token.END_OBJECT { // no comma just before the end of the object
+			if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object
 				p.addError("No ',' before '}'")
 				return nil
 			}
@ -90,7 +96,7 @@ func (p *Parser) parseObject() JsonObject {
 		}
 	}
-	// Expect the end object token
+	// Ensure the end of the object is reached
 	if !p.curTokenIs(token.END_OBJECT) {
 		p.addError("expected '}'")
 		return nil
@ -99,10 +105,11 @@ func (p *Parser) parseObject() JsonObject {
 	return object
 }
 // parseArray parses a JSON array from the token stream.
 func (p *Parser) parseArray() JsonArray {
 	array := JsonArray{}
-	// Expect the current token to be TokenBeginArray
+	// Ensure the current token is the beginning of an array
 	if !p.curTokenIs(token.BEGIN_ARRAY) {
 		p.addError("expected '['")
 		return nil
@ -111,7 +118,7 @@ func (p *Parser) parseArray() JsonArray {
 	// Move to the next token
 	p.nextToken()
-	// Loop until we find an end array token
+	// Loop until the end of the array is reached
 	for !p.curTokenIs(token.END_ARRAY) {
 		// Parse the value
 		value, err := p.parseValue()
@ -124,13 +131,13 @@ func (p *Parser) parseArray() JsonArray {
 		// Move past the value
 		p.nextToken()
-		// If we have a value separator (comma), continue to the next value
+		// Handle comma separation for multiple values
 		if p.curTokenIs(token.VALUE_SEPARATOR) {
 			p.nextToken()
 		}
 	}
-	// Expect the end array token
+	// Ensure the end of the array is reached
 	if !p.curTokenIs(token.END_ARRAY) {
 		return nil
 	}
@ -138,10 +145,12 @@ func (p *Parser) parseArray() JsonArray {
 	return array
 }
 // addError appends an error message to the parser's errors slice.
 func (p *Parser) addError(msg string) {
 	p.errors = append(p.errors, msg)
 }
 // parseObjectKey parses and returns the key of an object field.
 func (p *Parser) parseObjectKey() string {
 	if p.curToken.Type != token.STRING {
 		p.addError("expected string for key")
@ -150,6 +159,7 @@ func (p *Parser) parseObjectKey() string {
 	return p.curToken.Value
 }
 // parseValue parses a JSON value based on the current token type.
 func (p *Parser) parseValue() (interface{}, error) {
 	switch p.curToken.Type {
 	case token.STRING:
@ -164,18 +174,17 @@ func (p *Parser) parseValue() (interface{}, error) {
 		return p.parseObject(), nil
 	case token.BEGIN_ARRAY:
 		return p.parseArray(), nil
 	// ... other cases
 	default:
 		p.addError("unexpected token")
 		return nil, errors.New("unexpected token")
 	}
 }
 // parseNumber parses a number token into an appropriate Go numeric type.
 func (p *Parser) parseNumber() interface{} {
 	// Assuming the number is in a string format in the token
 	numStr := p.curToken.Value
-	// Check if the number is an integer or a float
+	// Check for float or integer representation
 	if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") {
 		// Parse as float
 		val, err := strconv.ParseFloat(numStr, 64)
@ -195,10 +204,12 @@ func (p *Parser) parseNumber() interface{} {
 	return val
 }
 // parseBoolean returns a boolean value based on the current token.
 func (p *Parser) parseBoolean() bool {
 	return p.curToken.Type == token.TRUE
 }
 // expectPeek checks if the next token is of the expected type.
 func (p *Parser) expectPeek(t token.TokenType) bool {
 	if p.peekToken.Type == t {
 		p.nextToken()
@ -209,14 +220,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool {
 	}
 }
 // curTokenIs checks if the current token is of a specific type.
 func (p *Parser) curTokenIs(t token.TokenType) bool {
 	return p.curToken.Type == t
 }
 /*
 func (p *Parser) parseArray() *JsonArray {
 	// Implementation for parsing an array
 }
 */
 // ... other parse methods for different types
--- a/tests/step1/invalid.json
+++ b/tests/step1/invalid.json
--- a/tests/step1/valid.json
+++ b/tests/step1/valid.json
@ -1 +0,0 @@
 {}
--- a/tests/step2/invalid.json
+++ b/tests/step2/invalid.json
@ -1 +0,0 @@
 {"key": "value",}
--- a/tests/step2/invalid2.json
+++ b/tests/step2/invalid2.json
@ -1,4 +0,0 @@
 {
  "key": "value",
  key2: "value"
 }
--- a/tests/step2/valid.json
+++ b/tests/step2/valid.json
@ -1 +0,0 @@
 {"key": "value"}
--- a/tests/step2/valid2.json
+++ b/tests/step2/valid2.json
@ -1,4 +0,0 @@
 {
  "key": "value",
  "key2": "value"
 }
--- a/tests/step3/invalid.json
+++ b/tests/step3/invalid.json
@ -1,7 +0,0 @@
 {
  "key1": true,
  "key2": False,
  "key3": null,
  "key4": "value",
  "key5": 101
 }
--- a/tests/step3/valid.json
+++ b/tests/step3/valid.json
@ -1,7 +0,0 @@
 {
  "key1": true,
  "key2": false,
  "key3": null,
  "key4": "value",
  "key5": 101
 }
--- a/tests/step4/invalid.json
+++ b/tests/step4/invalid.json
@ -1,8 +0,0 @@
 {
  "key": "value",
  "key-n": 101,
  "key-o": {
    "inner key": "inner value"
  },
  "key-l": ['list value']
 }
--- a/tests/step4/valid.json
+++ b/tests/step4/valid.json
@ -1,6 +0,0 @@
 {
  "key": "value",
  "key-n": 101,
  "key-o": {},
  "key-l": []
 }
--- a/tests/step4/valid2.json
+++ b/tests/step4/valid2.json
@ -1,8 +0,0 @@
 {
  "key": "value",
  "key-n": 101,
  "key-o": {
    "inner key": "inner value"
  },
  "key-l": ["list value"]
 }