// Package lexer defines the structure and methods for lexical analysis of JSON. package lexer import ( "gitea.paas.celticinfo.fr/oabrivard/gojson/token" ) // Lexer struct represents a lexical analyzer with its input, current position, // next reading position, and current character. type Lexer struct { input string // the string being scanned position int // current position in the input (points to current char) readPosition int // current reading position in the input (after current char) ch byte // current char under examination } // NewLexer creates and initializes a new Lexer with the given input string. func NewLexer(input string) *Lexer { l := &Lexer{input: input} l.readChar() // Initialize the first character return l } // NextToken reads the next token from the input and returns it. func (l *Lexer) NextToken() token.Token { var tok token.Token l.skipWhitespace() // Skip any whitespace before the next token // Switch on the current character to determine the token type switch l.ch { case '{': tok = token.NewToken(token.BEGIN_OBJECT, l.ch) case '}': tok = token.NewToken(token.END_OBJECT, l.ch) case '[': tok = token.NewToken(token.BEGIN_ARRAY, l.ch) case ']': tok = token.NewToken(token.END_ARRAY, l.ch) case ':': tok = token.NewToken(token.NAME_SEPARATOR, l.ch) case ',': tok = token.NewToken(token.VALUE_SEPARATOR, l.ch) case '"': tok.Type = token.STRING tok.Value = l.readString() // Read a string token case 0: tok.Value = "" tok.Type = token.EOF // End of file/input default: // Handle numbers and identifiers or mark as illegal if isDigit(l.ch) || l.ch == '-' { tok.Value = l.readNumber() tok.Type = token.NUMBER return tok } else if isLetter(l.ch) { tok.Value = l.readIdentifier() tok.Type = token.LookupIdent(tok.Value) return tok } else { tok = token.NewToken(token.ILLEGAL, l.ch) } } l.readChar() // Move to the next character return tok } // readChar advances to the next character in the input. func (l *Lexer) readChar() { if l.readPosition >= len(l.input) { l.ch = 0 // End of input } else { l.ch = l.input[l.readPosition] } l.position = l.readPosition l.readPosition++ } // skipWhitespace skips over any whitespace characters in the input. func (l *Lexer) skipWhitespace() { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } } // readNumber reads a number (integer or floating point) from the input. func (l *Lexer) readNumber() string { position := l.position for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' { l.readChar() } return l.input[position:l.position] } // isDigit checks if a character is a digit. func isDigit(ch byte) bool { return '0' <= ch && ch <= '9' } // readString reads a string from the input, handling escaped quotes. func (l *Lexer) readString() string { position := l.position + 1 for { l.readChar() if l.ch == '"' || l.ch == 0 { break } } return l.input[position:l.position] } // readIdentifier reads an identifier from the input. func (l *Lexer) readIdentifier() string { position := l.position for isLetter(l.ch) { l.readChar() } return l.input[position:l.position] } // isLetter checks if a character is a letter or underscore. func isLetter(ch byte) bool { return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_' }