diff --git a/lexer/lexer.go b/lexer/lexer.go index f558b67..850dcb3 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,27 +1,33 @@ +// Package lexer defines the structure and methods for lexical analysis of JSON. package lexer import ( "gitea.paas.celticinfo.fr/oabrivard/gojson/token" ) +// Lexer struct represents a lexical analyzer with its input, current position, +// next reading position, and current character. type Lexer struct { - input string - position int - readPosition int - ch byte + input string // the string being scanned + position int // current position in the input (points to current char) + readPosition int // current reading position in the input (after current char) + ch byte // current char under examination } +// NewLexer creates and initializes a new Lexer with the given input string. func NewLexer(input string) *Lexer { l := &Lexer{input: input} - l.readChar() + l.readChar() // Initialize the first character return l } +// NextToken reads the next token from the input and returns it. func (l *Lexer) NextToken() token.Token { var tok token.Token - l.skipWhitespace() + l.skipWhitespace() // Skip any whitespace before the next token + // Switch on the current character to determine the token type switch l.ch { case '{': tok = token.NewToken(token.BEGIN_OBJECT, l.ch) @@ -37,11 +43,12 @@ func (l *Lexer) NextToken() token.Token { tok = token.NewToken(token.VALUE_SEPARATOR, l.ch) case '"': tok.Type = token.STRING - tok.Value = l.readString() + tok.Value = l.readString() // Read a string token case 0: tok.Value = "" - tok.Type = token.EOF + tok.Type = token.EOF // End of file/input default: + // Handle numbers and identifiers or mark as illegal if isDigit(l.ch) || l.ch == '-' { tok.Value = l.readNumber() tok.Type = token.NUMBER @@ -55,13 +62,14 @@ func (l *Lexer) NextToken() token.Token { } } - l.readChar() + l.readChar() // Move to the next character return tok } +// readChar advances to the next character in the input. func (l *Lexer) readChar() { if l.readPosition >= len(l.input) { - l.ch = 0 + l.ch = 0 // End of input } else { l.ch = l.input[l.readPosition] } @@ -69,12 +77,14 @@ func (l *Lexer) readChar() { l.readPosition++ } +// skipWhitespace skips over any whitespace characters in the input. func (l *Lexer) skipWhitespace() { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } } +// readNumber reads a number (integer or floating point) from the input. func (l *Lexer) readNumber() string { position := l.position for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' { @@ -83,10 +93,12 @@ func (l *Lexer) readNumber() string { return l.input[position:l.position] } +// isDigit checks if a character is a digit. func isDigit(ch byte) bool { return '0' <= ch && ch <= '9' } +// readString reads a string from the input, handling escaped quotes. func (l *Lexer) readString() string { position := l.position + 1 for { @@ -98,6 +110,7 @@ func (l *Lexer) readString() string { return l.input[position:l.position] } +// readIdentifier reads an identifier from the input. func (l *Lexer) readIdentifier() string { position := l.position for isLetter(l.ch) { @@ -106,6 +119,7 @@ func (l *Lexer) readIdentifier() string { return l.input[position:l.position] } +// isLetter checks if a character is a letter or underscore. func isLetter(ch byte) bool { return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_' } diff --git a/parser/parser.go b/parser/parser.go index 578aa18..1d646c0 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,3 +1,4 @@ +// Package parser defines the structure and methods for parsing JSON. package parser import ( @@ -10,15 +11,18 @@ import ( "gitea.paas.celticinfo.fr/oabrivard/gojson/token" ) +// Parser struct represents a parser with a lexer, current and peek tokens, +// and a slice to store parsing errors. type Parser struct { - lexer *lexer.Lexer + lexer *lexer.Lexer // the lexer from which the parser receives tokens - curToken token.Token - peekToken token.Token + curToken token.Token // current token under examination + peekToken token.Token // next token in the input - errors []string + errors []string // slice to store errors encountered during parsing } +// NewParser creates and initializes a new Parser with the given lexer. func NewParser(l *lexer.Lexer) *Parser { p := &Parser{lexer: l} // Initialize curToken and peekToken @@ -27,24 +31,26 @@ func NewParser(l *lexer.Lexer) *Parser { return p } +// nextToken advances both curToken and peekToken. func (p *Parser) nextToken() { p.curToken = p.peekToken p.peekToken = p.lexer.NextToken() } -// Methods to parse JSON structure - +// JsonObject and JsonArray are types to represent JSON objects and arrays, respectively. type JsonObject map[string]interface{} type JsonArray []interface{} +// Parse starts the parsing process and returns the top-level JSON object. func (p *Parser) Parse() JsonObject { return p.parseObject() } +// parseObject parses a JSON object from the token stream. func (p *Parser) parseObject() JsonObject { object := make(JsonObject) - // Expect the current token to be TokenBeginObject + // Ensure the current token is the beginning of an object if !p.curTokenIs(token.BEGIN_OBJECT) { p.addError("expected '{'") return nil @@ -53,14 +59,14 @@ func (p *Parser) parseObject() JsonObject { // Move to the next token p.nextToken() - // Loop until we find an end object token + // Loop until the end of the object is reached for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) { key := p.parseObjectKey() if key == "" { return nil } - // Expect a name separator (:) + // Ensure a name separator (:) follows the key if !p.expectPeek(token.NAME_SEPARATOR) { return nil } @@ -76,12 +82,12 @@ func (p *Parser) parseObject() JsonObject { object[key] = value - // Move past the value, potentially to a comma or the end object + // Move past the value p.nextToken() - // If we have a comma, the object continues + // Handle comma separation for multiple key-value pairs if p.curTokenIs(token.VALUE_SEPARATOR) { - if p.peekToken.Type == token.END_OBJECT { // no comma just before the end of the object + if p.peekToken.Type == token.END_OBJECT { // No comma just before the end of the object p.addError("No ',' before '}'") return nil } @@ -90,7 +96,7 @@ func (p *Parser) parseObject() JsonObject { } } - // Expect the end object token + // Ensure the end of the object is reached if !p.curTokenIs(token.END_OBJECT) { p.addError("expected '}'") return nil @@ -99,10 +105,11 @@ func (p *Parser) parseObject() JsonObject { return object } +// parseArray parses a JSON array from the token stream. func (p *Parser) parseArray() JsonArray { array := JsonArray{} - // Expect the current token to be TokenBeginArray + // Ensure the current token is the beginning of an array if !p.curTokenIs(token.BEGIN_ARRAY) { p.addError("expected '['") return nil @@ -111,7 +118,7 @@ func (p *Parser) parseArray() JsonArray { // Move to the next token p.nextToken() - // Loop until we find an end array token + // Loop until the end of the array is reached for !p.curTokenIs(token.END_ARRAY) { // Parse the value value, err := p.parseValue() @@ -124,13 +131,13 @@ func (p *Parser) parseArray() JsonArray { // Move past the value p.nextToken() - // If we have a value separator (comma), continue to the next value + // Handle comma separation for multiple values if p.curTokenIs(token.VALUE_SEPARATOR) { p.nextToken() } } - // Expect the end array token + // Ensure the end of the array is reached if !p.curTokenIs(token.END_ARRAY) { return nil } @@ -138,10 +145,12 @@ func (p *Parser) parseArray() JsonArray { return array } +// addError appends an error message to the parser's errors slice. func (p *Parser) addError(msg string) { p.errors = append(p.errors, msg) } +// parseObjectKey parses and returns the key of an object field. func (p *Parser) parseObjectKey() string { if p.curToken.Type != token.STRING { p.addError("expected string for key") @@ -150,6 +159,7 @@ func (p *Parser) parseObjectKey() string { return p.curToken.Value } +// parseValue parses a JSON value based on the current token type. func (p *Parser) parseValue() (interface{}, error) { switch p.curToken.Type { case token.STRING: @@ -164,18 +174,17 @@ func (p *Parser) parseValue() (interface{}, error) { return p.parseObject(), nil case token.BEGIN_ARRAY: return p.parseArray(), nil - // ... other cases default: p.addError("unexpected token") return nil, errors.New("unexpected token") } } +// parseNumber parses a number token into an appropriate Go numeric type. func (p *Parser) parseNumber() interface{} { - // Assuming the number is in a string format in the token numStr := p.curToken.Value - // Check if the number is an integer or a float + // Check for float or integer representation if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") { // Parse as float val, err := strconv.ParseFloat(numStr, 64) @@ -195,10 +204,12 @@ func (p *Parser) parseNumber() interface{} { return val } +// parseBoolean returns a boolean value based on the current token. func (p *Parser) parseBoolean() bool { return p.curToken.Type == token.TRUE } +// expectPeek checks if the next token is of the expected type. func (p *Parser) expectPeek(t token.TokenType) bool { if p.peekToken.Type == t { p.nextToken() @@ -209,14 +220,7 @@ func (p *Parser) expectPeek(t token.TokenType) bool { } } +// curTokenIs checks if the current token is of a specific type. func (p *Parser) curTokenIs(t token.TokenType) bool { return p.curToken.Type == t } - -/* -func (p *Parser) parseArray() *JsonArray { - // Implementation for parsing an array -} -*/ - -// ... other parse methods for different types diff --git a/tests/step1/invalid.json b/tests/step1/invalid.json deleted file mode 100644 index e69de29..0000000 diff --git a/tests/step1/valid.json b/tests/step1/valid.json deleted file mode 100644 index 9e26dfe..0000000 --- a/tests/step1/valid.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/tests/step2/invalid.json b/tests/step2/invalid.json deleted file mode 100644 index d7e32b8..0000000 --- a/tests/step2/invalid.json +++ /dev/null @@ -1 +0,0 @@ -{"key": "value",} \ No newline at end of file diff --git a/tests/step2/invalid2.json b/tests/step2/invalid2.json deleted file mode 100644 index eff13a5..0000000 --- a/tests/step2/invalid2.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "key": "value", - key2: "value" -} \ No newline at end of file diff --git a/tests/step2/valid.json b/tests/step2/valid.json deleted file mode 100644 index 76519fa..0000000 --- a/tests/step2/valid.json +++ /dev/null @@ -1 +0,0 @@ -{"key": "value"} diff --git a/tests/step2/valid2.json b/tests/step2/valid2.json deleted file mode 100644 index 3c88601..0000000 --- a/tests/step2/valid2.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "key": "value", - "key2": "value" -} \ No newline at end of file diff --git a/tests/step3/invalid.json b/tests/step3/invalid.json deleted file mode 100644 index 94d2214..0000000 --- a/tests/step3/invalid.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "key1": true, - "key2": False, - "key3": null, - "key4": "value", - "key5": 101 -} \ No newline at end of file diff --git a/tests/step3/valid.json b/tests/step3/valid.json deleted file mode 100644 index 6f99bea..0000000 --- a/tests/step3/valid.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "key1": true, - "key2": false, - "key3": null, - "key4": "value", - "key5": 101 -} \ No newline at end of file diff --git a/tests/step4/invalid.json b/tests/step4/invalid.json deleted file mode 100644 index 304c553..0000000 --- a/tests/step4/invalid.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "key": "value", - "key-n": 101, - "key-o": { - "inner key": "inner value" - }, - "key-l": ['list value'] -} \ No newline at end of file diff --git a/tests/step4/valid.json b/tests/step4/valid.json deleted file mode 100644 index 0299c4e..0000000 --- a/tests/step4/valid.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "key": "value", - "key-n": 101, - "key-o": {}, - "key-l": [] -} \ No newline at end of file diff --git a/tests/step4/valid2.json b/tests/step4/valid2.json deleted file mode 100644 index 0fdd8fb..0000000 --- a/tests/step4/valid2.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "key": "value", - "key-n": 101, - "key-o": { - "inner key": "inner value" - }, - "key-l": ["list value"] -} \ No newline at end of file