You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
126 lines
3.4 KiB
Go
126 lines
3.4 KiB
Go
// Package lexer defines the structure and methods for lexical analysis of JSON.
|
|
package lexer
|
|
|
|
import (
|
|
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
|
|
)
|
|
|
|
// Lexer struct represents a lexical analyzer with its input, current position,
|
|
// next reading position, and current character.
|
|
type Lexer struct {
|
|
input string // the string being scanned
|
|
position int // current position in the input (points to current char)
|
|
readPosition int // current reading position in the input (after current char)
|
|
ch byte // current char under examination
|
|
}
|
|
|
|
// NewLexer creates and initializes a new Lexer with the given input string.
|
|
func NewLexer(input string) *Lexer {
|
|
l := &Lexer{input: input}
|
|
l.readChar() // Initialize the first character
|
|
return l
|
|
}
|
|
|
|
// NextToken reads the next token from the input and returns it.
|
|
func (l *Lexer) NextToken() token.Token {
|
|
var tok token.Token
|
|
|
|
l.skipWhitespace() // Skip any whitespace before the next token
|
|
|
|
// Switch on the current character to determine the token type
|
|
switch l.ch {
|
|
case '{':
|
|
tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
|
|
case '}':
|
|
tok = token.NewToken(token.END_OBJECT, l.ch)
|
|
case '[':
|
|
tok = token.NewToken(token.BEGIN_ARRAY, l.ch)
|
|
case ']':
|
|
tok = token.NewToken(token.END_ARRAY, l.ch)
|
|
case ':':
|
|
tok = token.NewToken(token.NAME_SEPARATOR, l.ch)
|
|
case ',':
|
|
tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
|
|
case '"':
|
|
tok.Type = token.STRING
|
|
tok.Value = l.readString() // Read a string token
|
|
case 0:
|
|
tok.Value = ""
|
|
tok.Type = token.EOF // End of file/input
|
|
default:
|
|
// Handle numbers and identifiers or mark as illegal
|
|
if isDigit(l.ch) || l.ch == '-' {
|
|
tok.Value = l.readNumber()
|
|
tok.Type = token.NUMBER
|
|
return tok
|
|
} else if isLetter(l.ch) {
|
|
tok.Value = l.readIdentifier()
|
|
tok.Type = token.LookupIdent(tok.Value)
|
|
return tok
|
|
} else {
|
|
tok = token.NewToken(token.ILLEGAL, l.ch)
|
|
}
|
|
}
|
|
|
|
l.readChar() // Move to the next character
|
|
return tok
|
|
}
|
|
|
|
// readChar advances to the next character in the input.
|
|
func (l *Lexer) readChar() {
|
|
if l.readPosition >= len(l.input) {
|
|
l.ch = 0 // End of input
|
|
} else {
|
|
l.ch = l.input[l.readPosition]
|
|
}
|
|
l.position = l.readPosition
|
|
l.readPosition++
|
|
}
|
|
|
|
// skipWhitespace skips over any whitespace characters in the input.
|
|
func (l *Lexer) skipWhitespace() {
|
|
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
|
|
l.readChar()
|
|
}
|
|
}
|
|
|
|
// readNumber reads a number (integer or floating point) from the input.
|
|
func (l *Lexer) readNumber() string {
|
|
position := l.position
|
|
for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
|
|
l.readChar()
|
|
}
|
|
return l.input[position:l.position]
|
|
}
|
|
|
|
// isDigit checks if a character is a digit.
|
|
func isDigit(ch byte) bool {
|
|
return '0' <= ch && ch <= '9'
|
|
}
|
|
|
|
// readString reads a string from the input, handling escaped quotes.
|
|
func (l *Lexer) readString() string {
|
|
position := l.position + 1
|
|
for {
|
|
l.readChar()
|
|
if l.ch == '"' || l.ch == 0 {
|
|
break
|
|
}
|
|
}
|
|
return l.input[position:l.position]
|
|
}
|
|
|
|
// readIdentifier reads an identifier from the input.
|
|
func (l *Lexer) readIdentifier() string {
|
|
position := l.position
|
|
for isLetter(l.ch) {
|
|
l.readChar()
|
|
}
|
|
return l.input[position:l.position]
|
|
}
|
|
|
|
// isLetter checks if a character is a letter or underscore.
|
|
func isLetter(ch byte) bool {
|
|
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
|
|
}
|