Added comments for lexer and parser (comments were all generated by ChatGPT)

main
oabrivard 2 years ago
parent 4c2af09c99
commit b7502143c4

@ -1,27 +1,33 @@
// Package lexer defines the structure and methods for lexical analysis of JSON.
package lexer
import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
// Lexer struct represents a lexical analyzer with its input, current position,
// next reading position, and current character.
type Lexer struct {
input string
position int
readPosition int
ch byte
input string // the string being scanned
position int // current position in the input (points to current char)
readPosition int // current reading position in the input (after current char)
ch byte // current char under examination
}
// NewLexer creates and initializes a new Lexer with the given input string.
func NewLexer(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
l.readChar() // Initialize the first character
return l
}
// NextToken reads the next token from the input and returns it.
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
l.skipWhitespace() // Skip any whitespace before the next token
// Switch on the current character to determine the token type
switch l.ch {
case '{':
tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
@ -37,11 +43,12 @@ func (l *Lexer) NextToken() token.Token {
tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
case '"':
tok.Type = token.STRING
tok.Value = l.readString()
tok.Value = l.readString() // Read a string token
case 0:
tok.Value = ""
tok.Type = token.EOF
tok.Type = token.EOF // End of file/input
default:
// Handle numbers and identifiers or mark as illegal
if isDigit(l.ch) || l.ch == '-' {
tok.Value = l.readNumber()
tok.Type = token.NUMBER
@ -55,13 +62,14 @@ func (l *Lexer) NextToken() token.Token {
}
}
l.readChar()
l.readChar() // Move to the next character
return tok
}
// readChar advances to the next character in the input.
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
l.ch = 0 // End of input
} else {
l.ch = l.input[l.readPosition]
}
@ -69,12 +77,14 @@ func (l *Lexer) readChar() {
l.readPosition++
}
// skipWhitespace skips over any whitespace characters in the input.
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
// readNumber reads a number (integer or floating point) from the input.
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
@ -83,10 +93,12 @@ func (l *Lexer) readNumber() string {
return l.input[position:l.position]
}
// isDigit checks if a character is a digit.
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
// readString reads a string from the input, handling escaped quotes.
func (l *Lexer) readString() string {
position := l.position + 1
for {
@ -98,6 +110,7 @@ func (l *Lexer) readString() string {
return l.input[position:l.position]
}
// readIdentifier reads an identifier from the input.
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
@ -106,6 +119,7 @@ func (l *Lexer) readIdentifier() string {
return l.input[position:l.position]
}
// isLetter checks if a character is a letter or underscore.
func isLetter(ch byte) bool {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
}

@ -1,3 +1,4 @@
// Package parser defines the structure and methods for parsing JSON.
package parser
import (
@ -10,15 +11,18 @@ import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
// Parser struct represents a parser with a lexer, current and peek tokens,
// and a slice to store parsing errors.
type Parser struct {
lexer *lexer.Lexer
lexer *lexer.Lexer // the lexer from which the parser receives tokens
curToken token.Token
peekToken token.Token
curToken token.Token // current token under examination
peekToken token.Token // next token in the input
errors []string
errors []string // slice to store errors encountered during parsing
}
// NewParser creates and initializes a new Parser with the given lexer.
func NewParser(l *lexer.Lexer) *Parser {
p := &Parser{lexer: l}
// Initialize curToken and peekToken
@ -27,24 +31,26 @@ func NewParser(l *lexer.Lexer) *Parser {
return p
}
// nextToken advances both curToken and peekToken.
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.lexer.NextToken()
}
// Methods to parse JSON structure
// JsonObject and JsonArray are types to represent JSON objects and arrays, respectively.
type JsonObject map[string]interface{}
type JsonArray []interface{}
// Parse starts the parsing process and returns the top-level JSON object.
func (p *Parser) Parse() JsonObject {
return p.parseObject()
}
// parseObject parses a JSON object from the token stream.
func (p *Parser) parseObject() JsonObject {
object := make(JsonObject)
// Expect the current token to be TokenBeginObject
// Ensure the current token is the beginning of an object
if !p.curTokenIs(token.BEGIN_OBJECT) {
p.addError("expected '{'")
return nil
@ -53,14 +59,14 @@ func (p *Parser) parseObject() JsonObject {
// Move to the next token
p.nextToken()
// Loop until we find an end object token
// Loop until the end of the object is reached
for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) {
key := p.parseObjectKey()
if key == "" {
return nil
}
// Expect a name separator (:)
// Ensure a name separator (:) follows the key
if !p.expectPeek(token.NAME_SEPARATOR) {
return nil
}
@ -76,12 +82,12 @@ func (p *Parser) parseObject() JsonObject {
object[key] = value
// Move past the value, potentially to a comma or the end object
// Move past the value
p.nextToken()
// If we have a comma, the object continues
// Handle comma separation for multiple key-value pairs
if p.curTokenIs(token.VALUE_SEPARATOR) {
if p.peekToken.Type == token.END_OBJECT { // no comma just before the end of the object
if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object
p.addError("No ',' before '}'")
return nil
}
@ -90,7 +96,7 @@ func (p *Parser) parseObject() JsonObject {
}
}
// Expect the end object token
// Ensure the end of the object is reached
if !p.curTokenIs(token.END_OBJECT) {
p.addError("expected '}'")
return nil
@ -99,10 +105,11 @@ func (p *Parser) parseObject() JsonObject {
return object
}
// parseArray parses a JSON array from the token stream.
func (p *Parser) parseArray() JsonArray {
array := JsonArray{}
// Expect the current token to be TokenBeginArray
// Ensure the current token is the beginning of an array
if !p.curTokenIs(token.BEGIN_ARRAY) {
p.addError("expected '['")
return nil
@ -111,7 +118,7 @@ func (p *Parser) parseArray() JsonArray {
// Move to the next token
p.nextToken()
// Loop until we find an end array token
// Loop until the end of the array is reached
for !p.curTokenIs(token.END_ARRAY) {
// Parse the value
value, err := p.parseValue()
@ -124,13 +131,13 @@ func (p *Parser) parseArray() JsonArray {
// Move past the value
p.nextToken()
// If we have a value separator (comma), continue to the next value
// Handle comma separation for multiple values
if p.curTokenIs(token.VALUE_SEPARATOR) {
p.nextToken()
}
}
// Expect the end array token
// Ensure the end of the array is reached
if !p.curTokenIs(token.END_ARRAY) {
return nil
}
@ -138,10 +145,12 @@ func (p *Parser) parseArray() JsonArray {
return array
}
// addError appends an error message to the parser's errors slice.
func (p *Parser) addError(msg string) {
p.errors = append(p.errors, msg)
}
// parseObjectKey parses and returns the key of an object field.
func (p *Parser) parseObjectKey() string {
if p.curToken.Type != token.STRING {
p.addError("expected string for key")
@ -150,6 +159,7 @@ func (p *Parser) parseObjectKey() string {
return p.curToken.Value
}
// parseValue parses a JSON value based on the current token type.
func (p *Parser) parseValue() (interface{}, error) {
switch p.curToken.Type {
case token.STRING:
@ -164,18 +174,17 @@ func (p *Parser) parseValue() (interface{}, error) {
return p.parseObject(), nil
case token.BEGIN_ARRAY:
return p.parseArray(), nil
// ... other cases
default:
p.addError("unexpected token")
return nil, errors.New("unexpected token")
}
}
// parseNumber parses a number token into an appropriate Go numeric type.
func (p *Parser) parseNumber() interface{} {
// Assuming the number is in a string format in the token
numStr := p.curToken.Value
// Check if the number is an integer or a float
// Check for float or integer representation
if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") {
// Parse as float
val, err := strconv.ParseFloat(numStr, 64)
@ -195,10 +204,12 @@ func (p *Parser) parseNumber() interface{} {
return val
}
// parseBoolean returns a boolean value based on the current token.
func (p *Parser) parseBoolean() bool {
return p.curToken.Type == token.TRUE
}
// expectPeek checks if the next token is of the expected type.
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekToken.Type == t {
p.nextToken()
@ -209,14 +220,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool {
}
}
// curTokenIs checks if the current token is of a specific type.
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}
/*
func (p *Parser) parseArray() *JsonArray {
// Implementation for parsing an array
}
*/
// ... other parse methods for different types

@ -1 +0,0 @@
{"key": "value",}

@ -1,4 +0,0 @@
{
"key": "value",
key2: "value"
}

@ -1 +0,0 @@
{"key": "value"}

@ -1,4 +0,0 @@
{
"key": "value",
"key2": "value"
}

@ -1,7 +0,0 @@
{
"key1": true,
"key2": False,
"key3": null,
"key4": "value",
"key5": 101
}

@ -1,7 +0,0 @@
{
"key1": true,
"key2": false,
"key3": null,
"key4": "value",
"key5": 101
}

@ -1,8 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ['list value']
}

@ -1,6 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {},
"key-l": []
}

@ -1,8 +0,0 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ["list value"]
}
Loading…
Cancel
Save