You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.4 KiB
Go

// Package lexer defines the structure and methods for lexical analysis of JSON.
package lexer
import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
// Lexer struct represents a lexical analyzer with its input, current position,
// next reading position, and current character.
type Lexer struct {
input string // the string being scanned
position int // current position in the input (points to current char)
readPosition int // current reading position in the input (after current char)
ch byte // current char under examination
}
// NewLexer creates and initializes a new Lexer with the given input string.
func NewLexer(input string) *Lexer {
l := &Lexer{input: input}
l.readChar() // Initialize the first character
return l
}
// NextToken reads the next token from the input and returns it.
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace() // Skip any whitespace before the next token
// Switch on the current character to determine the token type
switch l.ch {
case '{':
tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
case '}':
tok = token.NewToken(token.END_OBJECT, l.ch)
case '[':
tok = token.NewToken(token.BEGIN_ARRAY, l.ch)
case ']':
tok = token.NewToken(token.END_ARRAY, l.ch)
case ':':
tok = token.NewToken(token.NAME_SEPARATOR, l.ch)
case ',':
tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
case '"':
tok.Type = token.STRING
tok.Value = l.readString() // Read a string token
case 0:
tok.Value = ""
tok.Type = token.EOF // End of file/input
default:
// Handle numbers and identifiers or mark as illegal
if isDigit(l.ch) || l.ch == '-' {
tok.Value = l.readNumber()
tok.Type = token.NUMBER
return tok
} else if isLetter(l.ch) {
tok.Value = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Value)
return tok
} else {
tok = token.NewToken(token.ILLEGAL, l.ch)
}
}
l.readChar() // Move to the next character
return tok
}
// readChar advances to the next character in the input.
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0 // End of input
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition++
}
// skipWhitespace skips over any whitespace characters in the input.
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
// readNumber reads a number (integer or floating point) from the input.
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
l.readChar()
}
return l.input[position:l.position]
}
// isDigit checks if a character is a digit.
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
// readString reads a string from the input, handling escaped quotes.
func (l *Lexer) readString() string {
position := l.position + 1
for {
l.readChar()
if l.ch == '"' || l.ch == 0 {
break
}
}
return l.input[position:l.position]
}
// readIdentifier reads an identifier from the input.
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
// isLetter checks if a character is a letter or underscore.
func isLetter(ch byte) bool {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
}