Added comments for lexer and parser (comments were all generated by ChatGPT)

main
oabrivard 2 years ago
parent 4c2af09c99
commit b7502143c4

@ -1,27 +1,33 @@
// Package lexer defines the structure and methods for lexical analysis of JSON.
package lexer package lexer
import ( import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token" "gitea.paas.celticinfo.fr/oabrivard/gojson/token"
) )
// Lexer struct represents a lexical analyzer with its input, current position,
// next reading position, and current character.
type Lexer struct { type Lexer struct {
input string input string // the string being scanned
position int position int // current position in the input (points to current char)
readPosition int readPosition int // current reading position in the input (after current char)
ch byte ch byte // current char under examination
} }
// NewLexer creates and initializes a new Lexer with the given input string.
func NewLexer(input string) *Lexer { func NewLexer(input string) *Lexer {
l := &Lexer{input: input} l := &Lexer{input: input}
l.readChar() l.readChar() // Initialize the first character
return l return l
} }
// NextToken reads the next token from the input and returns it.
func (l *Lexer) NextToken() token.Token { func (l *Lexer) NextToken() token.Token {
var tok token.Token var tok token.Token
l.skipWhitespace() l.skipWhitespace() // Skip any whitespace before the next token
// Switch on the current character to determine the token type
switch l.ch { switch l.ch {
case '{': case '{':
tok = token.NewToken(token.BEGIN_OBJECT, l.ch) tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
@ -37,11 +43,12 @@ func (l *Lexer) NextToken() token.Token {
tok = token.NewToken(token.VALUE_SEPARATOR, l.ch) tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
case '"': case '"':
tok.Type = token.STRING tok.Type = token.STRING
tok.Value = l.readString() tok.Value = l.readString() // Read a string token
case 0: case 0:
tok.Value = "" tok.Value = ""
tok.Type = token.EOF tok.Type = token.EOF // End of file/input
default: default:
// Handle numbers and identifiers or mark as illegal
if isDigit(l.ch) || l.ch == '-' { if isDigit(l.ch) || l.ch == '-' {
tok.Value = l.readNumber() tok.Value = l.readNumber()
tok.Type = token.NUMBER tok.Type = token.NUMBER
@ -55,13 +62,14 @@ func (l *Lexer) NextToken() token.Token {
} }
} }
l.readChar() l.readChar() // Move to the next character
return tok return tok
} }
// readChar advances to the next character in the input.
func (l *Lexer) readChar() { func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) { if l.readPosition >= len(l.input) {
l.ch = 0 l.ch = 0 // End of input
} else { } else {
l.ch = l.input[l.readPosition] l.ch = l.input[l.readPosition]
} }
@ -69,12 +77,14 @@ func (l *Lexer) readChar() {
l.readPosition++ l.readPosition++
} }
// skipWhitespace skips over any whitespace characters in the input.
func (l *Lexer) skipWhitespace() { func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar() l.readChar()
} }
} }
// readNumber reads a number (integer or floating point) from the input.
func (l *Lexer) readNumber() string { func (l *Lexer) readNumber() string {
position := l.position position := l.position
for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' { for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
@ -83,10 +93,12 @@ func (l *Lexer) readNumber() string {
return l.input[position:l.position] return l.input[position:l.position]
} }
// isDigit checks if a character is a digit.
func isDigit(ch byte) bool { func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9' return '0' <= ch && ch <= '9'
} }
// readString reads a string from the input, handling escaped quotes.
func (l *Lexer) readString() string { func (l *Lexer) readString() string {
position := l.position + 1 position := l.position + 1
for { for {
@ -98,6 +110,7 @@ func (l *Lexer) readString() string {
return l.input[position:l.position] return l.input[position:l.position]
} }
// readIdentifier reads an identifier from the input.
func (l *Lexer) readIdentifier() string { func (l *Lexer) readIdentifier() string {
position := l.position position := l.position
for isLetter(l.ch) { for isLetter(l.ch) {
@ -106,6 +119,7 @@ func (l *Lexer) readIdentifier() string {
return l.input[position:l.position] return l.input[position:l.position]
} }
// isLetter checks if a character is a letter or underscore.
func isLetter(ch byte) bool { func isLetter(ch byte) bool {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_' return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
} }

@ -1,3 +1,4 @@
// Package parser defines the structure and methods for parsing JSON.
package parser package parser
import ( import (
@ -10,15 +11,18 @@ import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token" "gitea.paas.celticinfo.fr/oabrivard/gojson/token"
) )
// Parser struct represents a parser with a lexer, current and peek tokens,
// and a slice to store parsing errors.
type Parser struct { type Parser struct {
lexer *lexer.Lexer lexer *lexer.Lexer // the lexer from which the parser receives tokens
curToken token.Token curToken token.Token // current token under examination
peekToken token.Token peekToken token.Token // next token in the input
errors []string errors []string // slice to store errors encountered during parsing
} }
// NewParser creates and initializes a new Parser with the given lexer.
func NewParser(l *lexer.Lexer) *Parser { func NewParser(l *lexer.Lexer) *Parser {
p := &Parser{lexer: l} p := &Parser{lexer: l}
// Initialize curToken and peekToken // Initialize curToken and peekToken
@ -27,24 +31,26 @@ func NewParser(l *lexer.Lexer) *Parser {
return p return p
} }
// nextToken advances both curToken and peekToken.
func (p *Parser) nextToken() { func (p *Parser) nextToken() {
p.curToken = p.peekToken p.curToken = p.peekToken
p.peekToken = p.lexer.NextToken() p.peekToken = p.lexer.NextToken()
} }
// Methods to parse JSON structure // JsonObject and JsonArray are types to represent JSON objects and arrays, respectively.
type JsonObject map[string]interface{} type JsonObject map[string]interface{}
type JsonArray []interface{} type JsonArray []interface{}
// Parse starts the parsing process and returns the top-level JSON object.
func (p *Parser) Parse() JsonObject { func (p *Parser) Parse() JsonObject {
return p.parseObject() return p.parseObject()
} }
// parseObject parses a JSON object from the token stream.
func (p *Parser) parseObject() JsonObject { func (p *Parser) parseObject() JsonObject {
object := make(JsonObject) object := make(JsonObject)
// Expect the current token to be TokenBeginObject // Ensure the current token is the beginning of an object
if !p.curTokenIs(token.BEGIN_OBJECT) { if !p.curTokenIs(token.BEGIN_OBJECT) {
p.addError("expected '{'") p.addError("expected '{'")
return nil return nil
@ -53,14 +59,14 @@ func (p *Parser) parseObject() JsonObject {
// Move to the next token // Move to the next token
p.nextToken() p.nextToken()
// Loop until we find an end object token // Loop until the end of the object is reached
for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) { for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) {
key := p.parseObjectKey() key := p.parseObjectKey()
if key == "" { if key == "" {
return nil return nil
} }
// Expect a name separator (:) // Ensure a name separator (:) follows the key
if !p.expectPeek(token.NAME_SEPARATOR) { if !p.expectPeek(token.NAME_SEPARATOR) {
return nil return nil
} }
@ -76,12 +82,12 @@ func (p *Parser) parseObject() JsonObject {
object[key] = value object[key] = value
// Move past the value, potentially to a comma or the end object // Move past the value
p.nextToken() p.nextToken()
// If we have a comma, the object continues // Handle comma separation for multiple key-value pairs
if p.curTokenIs(token.VALUE_SEPARATOR) { if p.curTokenIs(token.VALUE_SEPARATOR) {
if p.peekToken.Type == token.END_OBJECT { // no comma just before the end of the object if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object
p.addError("No ',' before '}'") p.addError("No ',' before '}'")
return nil return nil
} }
@ -90,7 +96,7 @@ func (p *Parser) parseObject() JsonObject {
} }
} }
// Expect the end object token // Ensure the end of the object is reached
if !p.curTokenIs(token.END_OBJECT) { if !p.curTokenIs(token.END_OBJECT) {
p.addError("expected '}'") p.addError("expected '}'")
return nil return nil
@ -99,10 +105,11 @@ func (p *Parser) parseObject() JsonObject {
return object return object
} }
// parseArray parses a JSON array from the token stream.
func (p *Parser) parseArray() JsonArray { func (p *Parser) parseArray() JsonArray {
array := JsonArray{} array := JsonArray{}
// Expect the current token to be TokenBeginArray // Ensure the current token is the beginning of an array
if !p.curTokenIs(token.BEGIN_ARRAY) { if !p.curTokenIs(token.BEGIN_ARRAY) {
p.addError("expected '['") p.addError("expected '['")
return nil return nil
@ -111,7 +118,7 @@ func (p *Parser) parseArray() JsonArray {
// Move to the next token // Move to the next token
p.nextToken() p.nextToken()
// Loop until we find an end array token // Loop until the end of the array is reached
for !p.curTokenIs(token.END_ARRAY) { for !p.curTokenIs(token.END_ARRAY) {
// Parse the value // Parse the value
value, err := p.parseValue() value, err := p.parseValue()
@ -124,13 +131,13 @@ func (p *Parser) parseArray() JsonArray {
// Move past the value // Move past the value
p.nextToken() p.nextToken()
// If we have a value separator (comma), continue to the next value // Handle comma separation for multiple values
if p.curTokenIs(token.VALUE_SEPARATOR) { if p.curTokenIs(token.VALUE_SEPARATOR) {
p.nextToken() p.nextToken()
} }
} }
// Expect the end array token // Ensure the end of the array is reached
if !p.curTokenIs(token.END_ARRAY) { if !p.curTokenIs(token.END_ARRAY) {
return nil return nil
} }
@ -138,10 +145,12 @@ func (p *Parser) parseArray() JsonArray {
return array return array
} }
// addError appends an error message to the parser's errors slice.
func (p *Parser) addError(msg string) { func (p *Parser) addError(msg string) {
p.errors = append(p.errors, msg) p.errors = append(p.errors, msg)
} }
// parseObjectKey parses and returns the key of an object field.
func (p *Parser) parseObjectKey() string { func (p *Parser) parseObjectKey() string {
if p.curToken.Type != token.STRING { if p.curToken.Type != token.STRING {
p.addError("expected string for key") p.addError("expected string for key")
@ -150,6 +159,7 @@ func (p *Parser) parseObjectKey() string {
return p.curToken.Value return p.curToken.Value
} }
// parseValue parses a JSON value based on the current token type.
func (p *Parser) parseValue() (interface{}, error) { func (p *Parser) parseValue() (interface{}, error) {
switch p.curToken.Type { switch p.curToken.Type {
case token.STRING: case token.STRING:
@ -164,18 +174,17 @@ func (p *Parser) parseValue() (interface{}, error) {
return p.parseObject(), nil return p.parseObject(), nil
case token.BEGIN_ARRAY: case token.BEGIN_ARRAY:
return p.parseArray(), nil return p.parseArray(), nil
// ... other cases
default: default:
p.addError("unexpected token") p.addError("unexpected token")
return nil, errors.New("unexpected token") return nil, errors.New("unexpected token")
} }
} }
// parseNumber parses a number token into an appropriate Go numeric type.
func (p *Parser) parseNumber() interface{} { func (p *Parser) parseNumber() interface{} {
// Assuming the number is in a string format in the token
numStr := p.curToken.Value numStr := p.curToken.Value
// Check if the number is an integer or a float // Check for float or integer representation
if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") { if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") {
// Parse as float // Parse as float
val, err := strconv.ParseFloat(numStr, 64) val, err := strconv.ParseFloat(numStr, 64)
@ -195,10 +204,12 @@ func (p *Parser) parseNumber() interface{} {
return val return val
} }
// parseBoolean returns a boolean value based on the current token.
func (p *Parser) parseBoolean() bool { func (p *Parser) parseBoolean() bool {
return p.curToken.Type == token.TRUE return p.curToken.Type == token.TRUE
} }
// expectPeek checks if the next token is of the expected type.
func (p *Parser) expectPeek(t token.TokenType) bool { func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekToken.Type == t { if p.peekToken.Type == t {
p.nextToken() p.nextToken()
@ -209,14 +220,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool {
} }
} }
// curTokenIs checks if the current token is of a specific type.
func (p *Parser) curTokenIs(t token.TokenType) bool { func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t return p.curToken.Type == t
} }
/*
func (p *Parser) parseArray() *JsonArray {
// Implementation for parsing an array
}
*/
// ... other parse methods for different types

@ -1 +0,0 @@
{"key": "value",}

@ -1,4 +0,0 @@
{
"key": "value",
key2: "value"
}

@ -1 +0,0 @@
{"key": "value"}

@ -1,4 +0,0 @@
{
"key": "value",
"key2": "value"
}

@ -1,7 +0,0 @@
{
"key1": true,
"key2": False,
"key3": null,
"key4": "value",
"key5": 101
}

@ -1,7 +0,0 @@
{
"key1": true,
"key2": false,
"key3": null,
"key4": "value",
"key5": 101
}

@ -1,8 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ['list value']
}

@ -1,6 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {},
"key-l": []
}

@ -1,8 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ["list value"]
}
Loading…
Cancel
Save