Implemented lexer and parser working for all provided test cases

main
oabrivard 2 years ago
parent 53daa8fb22
commit 4c2af09c99

@ -0,0 +1,3 @@
module gitea.paas.celticinfo.fr/oabrivard/gojson
go 1.21.4

@ -0,0 +1,111 @@
package lexer
import (
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
type Lexer struct {
input string
position int
readPosition int
ch byte
}
func NewLexer(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
switch l.ch {
case '{':
tok = token.NewToken(token.BEGIN_OBJECT, l.ch)
case '}':
tok = token.NewToken(token.END_OBJECT, l.ch)
case '[':
tok = token.NewToken(token.BEGIN_ARRAY, l.ch)
case ']':
tok = token.NewToken(token.END_ARRAY, l.ch)
case ':':
tok = token.NewToken(token.NAME_SEPARATOR, l.ch)
case ',':
tok = token.NewToken(token.VALUE_SEPARATOR, l.ch)
case '"':
tok.Type = token.STRING
tok.Value = l.readString()
case 0:
tok.Value = ""
tok.Type = token.EOF
default:
if isDigit(l.ch) || l.ch == '-' {
tok.Value = l.readNumber()
tok.Type = token.NUMBER
return tok
} else if isLetter(l.ch) {
tok.Value = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Value)
return tok
} else {
tok = token.NewToken(token.ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition++
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) || l.ch == '.' || l.ch == '-' || l.ch == '+' || l.ch == 'e' || l.ch == 'E' {
l.readChar()
}
return l.input[position:l.position]
}
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func (l *Lexer) readString() string {
position := l.position + 1
for {
l.readChar()
if l.ch == '"' || l.ch == 0 {
break
}
}
return l.input[position:l.position]
}
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func isLetter(ch byte) bool {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'
}

@ -0,0 +1,45 @@
package lexer
import (
"testing"
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
func TestTokenizeSimpleObject(t *testing.T) {
input := `{"name": "John", "age": 30, "value": -3.5e+5}`
tests := []struct {
expectedType token.TokenType
expectedLiteral string
}{
{token.BEGIN_OBJECT, "{"},
{token.STRING, "name"},
{token.NAME_SEPARATOR, ":"},
{token.STRING, "John"},
{token.VALUE_SEPARATOR, ","},
{token.STRING, "age"},
{token.NAME_SEPARATOR, ":"},
{token.NUMBER, "30"},
{token.VALUE_SEPARATOR, ","},
{token.STRING, "value"},
{token.NAME_SEPARATOR, ":"},
{token.NUMBER, "-3.5e+5"},
{token.END_OBJECT, "}"},
{token.EOF, ""},
}
l := NewLexer(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Value != tt.expectedLiteral {
t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", i, tt.expectedLiteral, tok.Value)
}
}
}

@ -0,0 +1,222 @@
package parser
import (
"errors"
"fmt"
"strconv"
"strings"
"gitea.paas.celticinfo.fr/oabrivard/gojson/lexer"
"gitea.paas.celticinfo.fr/oabrivard/gojson/token"
)
type Parser struct {
lexer *lexer.Lexer
curToken token.Token
peekToken token.Token
errors []string
}
func NewParser(l *lexer.Lexer) *Parser {
p := &Parser{lexer: l}
// Initialize curToken and peekToken
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.lexer.NextToken()
}
// Methods to parse JSON structure
type JsonObject map[string]interface{}
type JsonArray []interface{}
func (p *Parser) Parse() JsonObject {
return p.parseObject()
}
func (p *Parser) parseObject() JsonObject {
object := make(JsonObject)
// Expect the current token to be TokenBeginObject
if !p.curTokenIs(token.BEGIN_OBJECT) {
p.addError("expected '{'")
return nil
}
// Move to the next token
p.nextToken()
// Loop until we find an end object token
for !p.curTokenIs(token.END_OBJECT) && !p.curTokenIs(token.EOF) {
key := p.parseObjectKey()
if key == "" {
return nil
}
// Expect a name separator (:)
if !p.expectPeek(token.NAME_SEPARATOR) {
return nil
}
// Move to the value token
p.nextToken()
// Parse the value
value, err := p.parseValue()
if err != nil {
return nil
}
object[key] = value
// Move past the value, potentially to a comma or the end object
p.nextToken()
// If we have a comma, the object continues
if p.curTokenIs(token.VALUE_SEPARATOR) {
if p.peekToken.Type == token.END_OBJECT { // no comma just before the end of the object
p.addError("No ',' before '}'")
return nil
}
p.nextToken()
}
}
// Expect the end object token
if !p.curTokenIs(token.END_OBJECT) {
p.addError("expected '}'")
return nil
}
return object
}
func (p *Parser) parseArray() JsonArray {
array := JsonArray{}
// Expect the current token to be TokenBeginArray
if !p.curTokenIs(token.BEGIN_ARRAY) {
p.addError("expected '['")
return nil
}
// Move to the next token
p.nextToken()
// Loop until we find an end array token
for !p.curTokenIs(token.END_ARRAY) {
// Parse the value
value, err := p.parseValue()
if err != nil {
return nil
}
array = append(array, value)
// Move past the value
p.nextToken()
// If we have a value separator (comma), continue to the next value
if p.curTokenIs(token.VALUE_SEPARATOR) {
p.nextToken()
}
}
// Expect the end array token
if !p.curTokenIs(token.END_ARRAY) {
return nil
}
return array
}
func (p *Parser) addError(msg string) {
p.errors = append(p.errors, msg)
}
func (p *Parser) parseObjectKey() string {
if p.curToken.Type != token.STRING {
p.addError("expected string for key")
return ""
}
return p.curToken.Value
}
func (p *Parser) parseValue() (interface{}, error) {
switch p.curToken.Type {
case token.STRING:
return p.curToken.Value, nil
case token.NUMBER:
return p.parseNumber(), nil
case token.TRUE, token.FALSE:
return p.parseBoolean(), nil
case token.NULL:
return nil, nil
case token.BEGIN_OBJECT:
return p.parseObject(), nil
case token.BEGIN_ARRAY:
return p.parseArray(), nil
// ... other cases
default:
p.addError("unexpected token")
return nil, errors.New("unexpected token")
}
}
func (p *Parser) parseNumber() interface{} {
// Assuming the number is in a string format in the token
numStr := p.curToken.Value
// Check if the number is an integer or a float
if strings.Contains(numStr, ".") || strings.ContainsAny(numStr, "eE") {
// Parse as float
val, err := strconv.ParseFloat(numStr, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse %q as float", numStr))
return nil
}
return val
}
// Parse as integer
val, err := strconv.ParseInt(numStr, 10, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse %q as integer", numStr))
return nil
}
return val
}
func (p *Parser) parseBoolean() bool {
return p.curToken.Type == token.TRUE
}
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekToken.Type == t {
p.nextToken()
return true
} else {
p.addError(fmt.Sprintf("expected next token to be %v, got %v instead", t, p.peekToken.Type))
return false
}
}
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}
/*
func (p *Parser) parseArray() *JsonArray {
// Implementation for parsing an array
}
*/
// ... other parse methods for different types

@ -0,0 +1,306 @@
package parser
import (
"reflect"
"testing"
"gitea.paas.celticinfo.fr/oabrivard/gojson/lexer"
)
func TestParseSimpleObject(t *testing.T) {
input := `{"name": "John", "age": 30, "isStudent": false}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"name": "John",
"age": int64(30), // Assuming numbers are parsed as float64
"isStudent": false,
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep1Valid(t *testing.T) {
input := `{}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep1Invalid(t *testing.T) {
input := ``
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 1 || p.errors[0] != "expected '{'" {
t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
}
if parsed != nil {
t.Errorf("expected a nil result from parsing an empty input")
}
}
func TestParseStep2Valid1(t *testing.T) {
input := `{"key": "value"}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"key": "value",
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep2Valid2(t *testing.T) {
input := `{
"key": "value",
"key2": "value"
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"key": "value",
"key2": "value",
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep2Invalid1(t *testing.T) {
input := `{"key": "value",}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 1 || p.errors[0] != "No ',' before '}'" {
t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
}
if parsed != nil {
t.Errorf("expected a nil result from parsing an empty input")
}
}
func TestParseStep2Invalid2(t *testing.T) {
input := `{
"key": "value",
key2: "value"
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 1 || p.errors[0] != "expected string for key" {
t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
}
if parsed != nil {
t.Errorf("expected a nil result from parsing an empty input")
}
}
func TestParseStep3Valid(t *testing.T) {
input := `{
"key1": true,
"key2": false,
"key3": null,
"key4": "value",
"key5": 101
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"key1": true,
"key2": false,
"key3": nil,
"key4": "value",
"key5": int64(101),
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep3Invalid(t *testing.T) {
input := `{
"key1": true,
"key2": False,
"key3": null,
"key4": "value",
"key5": 101
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 1 || p.errors[0] != "unexpected token" {
t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
}
if parsed != nil {
t.Errorf("expected a nil result from parsing an empty input")
}
}
func TestParseStep4Valid1(t *testing.T) {
input := `{
"key": "value",
"key-n": 101,
"key-o": {},
"key-l": []
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"key": "value",
"key-n": int64(101),
"key-o": JsonObject{},
"key-l": JsonArray{},
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep4Valid2(t *testing.T) {
input := `{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ["list value"]
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 0 {
errMsg := ""
for _, s := range p.errors {
errMsg += s + "\n"
}
t.Fatalf(errMsg)
}
expected := JsonObject{
"key": "value",
"key-n": int64(101),
"key-o": JsonObject{
"inner key": "inner value",
},
"key-l": JsonArray{"list value"},
}
if !reflect.DeepEqual(parsed, expected) {
t.Errorf("parsed object is not as expected. Got %+v, want %+v", parsed, expected)
}
}
func TestParseStep4Invalid(t *testing.T) {
input := `{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ['list value']
}`
l := lexer.NewLexer(input)
p := NewParser(l)
parsed := p.Parse()
if len(p.errors) != 2 || p.errors[0] != "unexpected token" || p.errors[1] != "expected string for key" {
t.Errorf("Not the expected error(s) during parsing, got %v", p.errors)
}
if parsed != nil {
t.Errorf("expected a nil result from parsing an empty input")
}
}

@ -0,0 +1 @@
{"key": "value",}

@ -0,0 +1,4 @@
{
"key": "value",
key2: "value"
}

@ -0,0 +1 @@
{"key": "value"}

@ -0,0 +1,4 @@
{
"key": "value",
"key2": "value"
}

@ -0,0 +1,7 @@
{
"key1": true,
"key2": False,
"key3": null,
"key4": "value",
"key5": 101
}

@ -0,0 +1,7 @@
{
"key1": true,
"key2": false,
"key3": null,
"key4": "value",
"key5": 101
}

@ -0,0 +1,8 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ['list value']
}

@ -0,0 +1,6 @@
{
"key": "value",
"key-n": 101,
"key-o": {},
"key-l": []
}

@ -0,0 +1,8 @@
{
"key": "value",
"key-n": 101,
"key-o": {
"inner key": "inner value"
},
"key-l": ["list value"]
}

@ -0,0 +1,49 @@
package token
type TokenType int
const (
// Special tokens
EOF TokenType = iota // Represents the end of the file/input
ILLEGAL // Represents any character or sequence of characters that doesn't form a valid token in JSON
// Symbols and structure tokens
BEGIN_ARRAY // [
END_ARRAY // ]
BEGIN_OBJECT // {
END_OBJECT // }
NAME_SEPARATOR // :
VALUE_SEPARATOR // ,
// Whitespace
WHITESPACE // Represents whitespace (spaces, tabs, line feeds, carriage returns).
// Literal types
STRING // Represents a string literal
NUMBER // Represents a number
TRUE // Represents the boolean value "true"
FALSE // Represents the boolean value "false"
NULL // Represents the "null" value
)
type Token struct {
Type TokenType
Value string
}
func NewToken(tokenType TokenType, ch byte) Token {
return Token{Type: tokenType, Value: string(ch)}
}
var keywords = map[string]TokenType{
"true": TRUE,
"false": FALSE,
"null": NULL,
}
func LookupIdent(ident string) TokenType {
if tok, ok := keywords[ident]; ok {
return tok
}
return ILLEGAL
}
Loading…
Cancel
Save