Add Scanner

main
oabrivard 1 year ago
parent 5063cdbf40
commit d8050c1699

@ -0,0 +1,2 @@
#!/bin/zsh
go build -o bin/golox

@ -6,13 +6,20 @@ import (
"os" "os"
) )
var hadError = false
func RunFile(path string) { func RunFile(path string) {
bytes, err := os.ReadFile(path) bytes, err := os.ReadFile(path)
if err != nil { if err != nil {
fmt.Println("Error reading file", path) fmt.Println("Error reading file", path)
os.Exit(74) os.Exit(74)
} }
run(string(bytes)) run(string(bytes))
if hadError {
os.Exit(65)
}
} }
func RunPrompt() { func RunPrompt() {
@ -32,9 +39,18 @@ func RunPrompt() {
} }
run(line) run(line)
hadError = false
} }
} }
func Error(line int, message string) {
report(line, "", message)
}
func report(line int, where string, message string) {
fmt.Printf("[line %d] Error %s: %s\n", line, where, message)
}
func run(source string) { func run(source string) {
fmt.Println(source) fmt.Println(source)
} }

@ -0,0 +1,180 @@
package lox
import (
"bytes"
"fmt"
"io"
"os"
"testing"
)
func TestRun(t *testing.T) {
old := os.Stdout // keep backup of the real stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
os.Stdout = w
outC := make(chan string)
// copy the output in a separate goroutine so printing can't block indefinitely
go func() {
var buf bytes.Buffer
io.Copy(&buf, r)
outC <- buf.String()
}()
source := "print('Hello, World!');"
run(source)
// back to normal state
w.Close()
os.Stdout = old // restoring the real stdout
out := <-outC
// reading our temp stdout
expected := source + "\n"
if out != expected {
t.Errorf("run() = %v; want %v", out, expected)
}
}
func TestRunFile(t *testing.T) {
old := os.Stdout // keep backup of the real stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
os.Stdout = w
outC := make(chan string)
// copy the output in a separate goroutine so printing can't block indefinitely
go func() {
var buf bytes.Buffer
io.Copy(&buf, r)
outC <- buf.String()
}()
// Create a temporary file with some content
tmpfile, err := os.CreateTemp("", "example.*.txt")
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmpfile.Name())
content := "print('Hello, World!');"
if _, err := tmpfile.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := tmpfile.Close(); err != nil {
t.Fatal(err)
}
RunFile(tmpfile.Name())
// back to normal state
w.Close()
os.Stdout = old // restoring the real stdout
out := <-outC
// reading our temp stdout
expected := "print('Hello, World!');\n"
if out != expected {
t.Errorf("RunFile() = %v; want %v", out, expected)
}
}
func TestError(t *testing.T) {
old := os.Stdout // keep backup of the real stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
os.Stdout = w
outC := make(chan string)
// copy the output in a separate goroutine so printing can't block indefinitely
go func() {
var buf bytes.Buffer
io.Copy(&buf, r)
outC <- buf.String()
}()
line := 1
message := "Unexpected character."
Error(line, message)
// back to normal state
w.Close()
os.Stdout = old // restoring the real stdout
out := <-outC
// reading our temp stdout
expected := fmt.Sprintf("[line %d] Error : %s\n", line, message)
if out != expected {
t.Errorf("Error() = %v; want %v", out, expected)
}
}
func TestReport(t *testing.T) {
old := os.Stdout // keep backup of the real stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
os.Stdout = w
outC := make(chan string)
// copy the output in a separate goroutine so printing can't block indefinitely
go func() {
var buf bytes.Buffer
io.Copy(&buf, r)
outC <- buf.String()
}()
line := 1
where := "at 'foo'"
message := "Unexpected character."
report(line, where, message)
// back to normal state
w.Close()
os.Stdout = old // restoring the real stdout
out := <-outC
// reading our temp stdout
expected := fmt.Sprintf("[line %d] Error %s: %s\n", line, where, message)
if out != expected {
t.Errorf("report() = %v; want %v", out, expected)
}
}
func TestRunPrompt(t *testing.T) {
oldStdin := os.Stdin
oldStdout := os.Stdout
rIn, wIn, _ := os.Pipe()
rOut, wOut, _ := os.Pipe()
os.Stdin = rIn
os.Stdout = wOut
outC := make(chan string)
go func() {
var buf bytes.Buffer
io.Copy(&buf, rOut)
outC <- buf.String()
}()
input := "print('Hello, World!');\n\n"
wIn.Write([]byte(input))
wIn.Close()
RunPrompt()
wOut.Close()
os.Stdin = oldStdin
os.Stdout = oldStdout
out := <-outC
expected := "> print('Hello, World!');\n\n> "
if out != expected {
t.Errorf("RunPrompt() = %v; want %v", out, expected)
}
}

@ -0,0 +1,244 @@
package scanner
import (
"golox/lox"
"golox/token"
"strconv"
)
// Scanner is a struct that holds the source code, the start and current position
// of the scanner, the current line, and the tokens that have been scanned.
type Scanner struct {
source string
start int
current int
line int
tokens []token.Token
}
// New creates a new Scanner struct with the given source code.
func New(source string) *Scanner {
return &Scanner{
source: source, // The source code to scan.
start: 0, // The start position of the scanner.
current: 0, // The current position of the scanner.
line: 1, // The current line number.
tokens: []token.Token{}, // The tokens that have been scanned.
}
}
// scanToken scans the next token in the source code.
func (s *Scanner) ScanTokens() []token.Token {
for !s.isAtEnd() {
s.start = s.current
s.scanToken()
}
s.tokens = append(s.tokens, token.New(token.EOF, "", nil, s.line))
return s.tokens
}
// isAtEnd returns true if the scanner has reached the end of the source code.
func (s *Scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
// scanToken scans the next token in the source code.
func (s *Scanner) scanToken() {
c := s.advance()
switch c {
case '(':
s.addToken(token.LEFT_PAREN)
case ')':
s.addToken(token.RIGHT_PAREN)
case '{':
s.addToken(token.LEFT_BRACE)
case '}':
s.addToken(token.RIGHT_BRACE)
case ',':
s.addToken(token.COMMA)
case '.':
s.addToken(token.DOT)
case '-':
s.addToken(token.MINUS)
case '+':
s.addToken(token.PLUS)
case ';':
s.addToken(token.SEMICOLON)
case '*':
s.addToken(token.STAR)
case '!':
if s.match('=') {
s.addToken(token.BANG_EQUAL)
} else {
s.addToken(token.BANG)
}
case '=':
if s.match('=') {
s.addToken(token.EQUAL_EQUAL)
} else {
s.addToken(token.EQUAL)
}
case '<':
if s.match('=') {
s.addToken(token.LESS_EQUAL)
} else {
s.addToken(token.LESS)
}
case '>':
if s.match('=') {
s.addToken(token.GREATER_EQUAL)
} else {
s.addToken(token.GREATER)
}
case '/':
if s.match('/') {
// A comment goes until the end of the line.
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
} else {
s.addToken(token.SLASH)
}
case ' ', '\r', '\t':
// Ignore whitespace.
case '\n':
s.line++
case '"':
s.string()
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
s.number()
default:
if isAlpha(c) {
s.identifier()
} else {
lox.Error(s.line, "Unexpected character.")
}
}
}
// identifier scans an identifier token.
func (s *Scanner) identifier() {
for isAlpha(s.peek()) || isDigit(s.peek()) {
s.advance()
}
text := s.source[s.start:s.current]
// Get the token type for the identifier (keyword or identifier).
t := token.LookupKeyword(text)
s.addToken(t)
}
// number scans a number token.
func (s *Scanner) number() {
for isDigit(s.peek()) {
s.advance()
}
// Look for a fractional part.
if s.peek() == '.' && isDigit(s.peekNext()) {
// Consume the "."
s.advance()
for isDigit(s.peek()) {
s.advance()
}
}
f, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
if err != nil {
lox.Error(s.line, "Could not parse number.")
return
}
s.addTokenLiteral(token.NUMBER, f)
}
// string scans a string token.
func (s *Scanner) string() {
for s.peek() != '"' && !s.isAtEnd() {
if s.peek() == '\n' {
s.line++
}
s.advance()
}
if s.isAtEnd() {
lox.Error(s.line, "Unterminated string.")
return
}
// The closing ".
s.advance()
// Trim the surrounding quotes.
value := s.source[s.start+1 : s.current-1]
s.addTokenLiteral(token.STRING, value)
}
// match returns true if the current character matches the expected character.
// If the current character matches the expected character, the character is consumed.
// If not, there is no side effect.
func (s *Scanner) match(expected byte) bool {
if s.isAtEnd() {
return false
}
if s.source[s.current] != expected {
return false
}
s.current++
return true
}
// peek returns the character at the current position without consuming it.
func (s *Scanner) peek() byte {
if s.isAtEnd() {
return '\000'
}
return s.source[s.current]
}
// peekNext returns the character at the next position without consuming it.
func (s *Scanner) peekNext() byte {
if s.current+1 >= len(s.source) {
return '\000'
}
return s.source[s.current+1]
}
// isAlpha returns true if the character is an alphabetic character.
func isAlpha(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}
// isDigit returns true if the character is a digit.
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// advance increments the current position of the scanner and
// returns the character at that position.
func (s *Scanner) advance() byte {
c := s.source[s.current]
s.current++
return c
}
// addToken adds a token to the list of tokens.
func (s *Scanner) addToken(t token.TokenType) {
s.addTokenLiteral(t, nil)
}
// addTokenLiteral adds a token with a literal value to the list of tokens.
func (s *Scanner) addTokenLiteral(t token.TokenType, literal interface{}) {
text := s.source[s.start:s.current] // This selects a half-open range which includes the first element, but excludes the last one
s.tokens = append(s.tokens, token.New(t, text, literal, s.line))
}

@ -0,0 +1,381 @@
package scanner
import (
"golox/token"
"testing"
)
func TestScanTokens(t *testing.T) {
tests := []struct {
name string
source string
tokens []token.TokenType
}{
{
name: "Single character tokens",
source: "(){}.,-+;*",
tokens: []token.TokenType{
token.LEFT_PAREN, token.RIGHT_PAREN, token.LEFT_BRACE, token.RIGHT_BRACE,
token.DOT, token.COMMA, token.MINUS, token.PLUS, token.SEMICOLON, token.STAR,
},
},
{
name: "Operators",
source: "! != = == < <= > >=",
tokens: []token.TokenType{
token.BANG, token.BANG_EQUAL, token.EQUAL, token.EQUAL_EQUAL,
token.LESS, token.LESS_EQUAL, token.GREATER, token.GREATER_EQUAL,
},
},
{
name: "Comments",
source: "// this is a comment\n+",
tokens: []token.TokenType{
token.PLUS,
},
},
{
name: "Whitespace",
source: " \r\t\n",
tokens: []token.TokenType{},
},
{
name: "String literals",
source: `"hello world"`,
tokens: []token.TokenType{
token.STRING,
},
},
{
name: "Number literals",
source: "123 45.67",
tokens: []token.TokenType{
token.NUMBER, token.NUMBER,
},
},
{
name: "Identifiers and keywords",
source: "and class else false for fun if nil or print return super this true var while",
tokens: []token.TokenType{
token.AND, token.CLASS, token.ELSE, token.FALSE, token.FOR, token.FUN, token.IF,
token.NIL, token.OR, token.PRINT, token.RETURN, token.SUPER, token.THIS, token.TRUE,
token.VAR, token.WHILE,
},
},
{
name: "Unterminated string",
source: `"unterminated string`,
tokens: []token.TokenType{},
},
{
name: "Unexpected character",
source: "@",
tokens: []token.TokenType{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
tokens := scanner.ScanTokens()
if len(tokens) != len(tt.tokens)+1 { // +1 for EOF token
t.Fatalf("expected %d tokens, got %d", len(tt.tokens)+1, len(tokens))
}
for i, tokenType := range tt.tokens {
if tokens[i].Type != tokenType {
t.Errorf("expected token %v, got %v", tokenType, tokens[i].Type)
}
}
if tokens[len(tokens)-1].Type != token.EOF {
t.Errorf("expected EOF token, got %v", tokens[len(tokens)-1].Type)
}
})
}
}
func TestIsAtEnd(t *testing.T) {
tests := []struct {
name string
source string
expected bool
}{
{"Not at end", "abc", false},
{"At end", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
if got := scanner.isAtEnd(); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestMatch(t *testing.T) {
tests := []struct {
name string
source string
expected bool
char byte
}{
{"Match character", "=", true, '='},
{"No match character", "!", false, '='},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
if got := scanner.match(tt.char); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestPeek(t *testing.T) {
tests := []struct {
name string
source string
expected byte
}{
{"Peek character", "abc", 'a'},
{"Peek at end", "", '\000'},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
if got := scanner.peek(); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestPeekNext(t *testing.T) {
tests := []struct {
name string
source string
expected byte
}{
{"Peek next character", "abc", 'b'},
{"Peek next at end", "a", '\000'},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
if got := scanner.peekNext(); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestAdvance(t *testing.T) {
tests := []struct {
name string
source string
expected byte
}{
{"Advance character", "abc", 'a'},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
if got := scanner.advance(); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestIsAlpha(t *testing.T) {
tests := []struct {
name string
char byte
expected bool
}{
{"Is alpha", 'a', true},
{"Is not alpha", '1', false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isAlpha(tt.char); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestIsDigit(t *testing.T) {
tests := []struct {
name string
char byte
expected bool
}{
{"Is digit", '1', true},
{"Is not digit", 'a', false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isDigit(tt.char); got != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, got)
}
})
}
}
func TestString(t *testing.T) {
tests := []struct {
name string
source string
expected string
}{
{"Valid string", `"hello"`, "hello"},
{"Unterminated string", `"hello`, ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
scanner.advance() // Move to the first character of the string
scanner.string()
if tt.expected == "" {
if len(scanner.tokens) != 0 {
t.Errorf("expected no tokens, got %d", len(scanner.tokens))
}
} else {
if len(scanner.tokens) != 1 {
t.Errorf("expected 1 token, got %d", len(scanner.tokens))
} else if scanner.tokens[0].Literal != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, scanner.tokens[0].Literal)
}
}
})
}
}
func TestNumber(t *testing.T) {
tests := []struct {
name string
source string
expected float64
}{
{"Integer number", "123", 123},
{"Floating point number", "45.67", 45.67},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
scanner.number()
if tt.expected == 0 {
if len(scanner.tokens) != 0 {
t.Errorf("expected no tokens, got %d", len(scanner.tokens))
}
} else {
if len(scanner.tokens) != 1 {
t.Errorf("expected 1 token, got %d", len(scanner.tokens))
} else if scanner.tokens[0].Literal != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, scanner.tokens[0].Literal)
}
}
})
}
}
func TestScanToken(t *testing.T) {
tests := []struct {
name string
source string
expected token.TokenType
}{
{"Left paren", "(", token.LEFT_PAREN},
{"Right paren", ")", token.RIGHT_PAREN},
{"Left brace", "{", token.LEFT_BRACE},
{"Right brace", "}", token.RIGHT_BRACE},
{"Comma", ",", token.COMMA},
{"Dot", ".", token.DOT},
{"Minus", "-", token.MINUS},
{"Plus", "+", token.PLUS},
{"Semicolon", ";", token.SEMICOLON},
{"Star", "*", token.STAR},
{"Bang", "!", token.BANG},
{"Bang equal", "!=", token.BANG_EQUAL},
{"Equal", "=", token.EQUAL},
{"Equal equal", "==", token.EQUAL_EQUAL},
{"Less", "<", token.LESS},
{"Less equal", "<=", token.LESS_EQUAL},
{"Greater", ">", token.GREATER},
{"Greater equal", ">=", token.GREATER_EQUAL},
{"Slash", "/", token.SLASH},
{"Comment", "// comment\n", token.EOF},
{"Whitespace", " \r\t\n", token.EOF},
{"String", `"hello"`, token.STRING},
{"Number", "123", token.NUMBER},
{"Identifier", "var", token.VAR},
{"Unexpected character", "@", token.EOF},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
scanner.scanToken()
if len(scanner.tokens) > 0 {
if scanner.tokens[0].Type != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, scanner.tokens[0].Type)
}
} else if tt.expected != token.EOF {
t.Errorf("expected %v, got no tokens", tt.expected)
}
})
}
}
func TestIdentifier(t *testing.T) {
tests := []struct {
name string
source string
expected token.TokenType
}{
{"Keyword and", "and", token.AND},
{"Keyword class", "class", token.CLASS},
{"Keyword else", "else", token.ELSE},
{"Keyword false", "false", token.FALSE},
{"Keyword for", "for", token.FOR},
{"Keyword fun", "fun", token.FUN},
{"Keyword if", "if", token.IF},
{"Keyword nil", "nil", token.NIL},
{"Keyword or", "or", token.OR},
{"Keyword print", "print", token.PRINT},
{"Keyword return", "return", token.RETURN},
{"Keyword super", "super", token.SUPER},
{"Keyword this", "this", token.THIS},
{"Keyword true", "true", token.TRUE},
{"Keyword var", "var", token.VAR},
{"Keyword while", "while", token.WHILE},
{"Identifier", "myVar", token.IDENTIFIER},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scanner := New(tt.source)
scanner.identifier()
if len(scanner.tokens) != 1 {
t.Fatalf("expected 1 token, got %d", len(scanner.tokens))
}
if scanner.tokens[0].Type != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, scanner.tokens[0].Type)
}
})
}
}

@ -0,0 +1,102 @@
package token
// TokenType represents the type of a token.
type TokenType int
// Token types.
const (
// Single-character tokens.
LEFT_PAREN TokenType = iota
RIGHT_PAREN
LEFT_BRACE
RIGHT_BRACE
COMMA
DOT
MINUS
PLUS
SEMICOLON
SLASH
STAR
// One or two character tokens.
BANG
BANG_EQUAL
EQUAL
EQUAL_EQUAL
GREATER
GREATER_EQUAL
LESS
LESS_EQUAL
// Literals.
IDENTIFIER
STRING
NUMBER
// Keywords.
AND
CLASS
ELSE
FALSE
FUN
FOR
IF
NIL
OR
PRINT
RETURN
SUPER
THIS
TRUE
VAR
WHILE
EOF
)
// Token represents a token in the source code.
type Token struct {
Type TokenType
Lexeme string
Literal interface{}
Line int
}
// New creates a new Token.
func New(t TokenType, lexeme string, literal interface{}, line int) Token {
return Token{t, lexeme, literal, line}
}
// String returns the string representation of the token.
func (t Token) String() string {
return t.Lexeme
}
// keywords maps keywords to their respective TokenType.
var keywords = map[string]TokenType{
"and": AND,
"class": CLASS,
"else": ELSE,
"false": FALSE,
"for": FOR,
"fun": FUN,
"if": IF,
"nil": NIL,
"or": OR,
"print": PRINT,
"return": RETURN,
"super": SUPER,
"this": THIS,
"true": TRUE,
"var": VAR,
"while": WHILE,
}
// LookupKeyword returns the TokenType for the given identifier.
// If the identifier is not a keyword, it returns IDENTIFIER.
func LookupKeyword(identifier string) TokenType {
if t, ok := keywords[identifier]; ok {
return t
}
return IDENTIFIER
}

@ -0,0 +1,76 @@
package token
import (
"testing"
)
func TestTokenCreation(t *testing.T) {
tests := []struct {
tokenType TokenType
lexeme string
literal interface{}
line int
}{
{LEFT_PAREN, "(", nil, 1},
{RIGHT_PAREN, ")", nil, 1},
{IDENTIFIER, "foo", nil, 1},
{STRING, "\"bar\"", "bar", 1},
{NUMBER, "123", 123, 1},
}
for _, tt := range tests {
token := New(tt.tokenType, tt.lexeme, tt.literal, tt.line)
if token.Type != tt.tokenType {
t.Errorf("expected token type %v, got %v", tt.tokenType, token.Type)
}
if token.Lexeme != tt.lexeme {
t.Errorf("expected lexeme %v, got %v", tt.lexeme, token.Lexeme)
}
if token.Literal != tt.literal {
t.Errorf("expected literal %v, got %v", tt.literal, token.Literal)
}
if token.Line != tt.line {
t.Errorf("expected line %v, got %v", tt.line, token.Line)
}
}
}
func TestTokenString(t *testing.T) {
token := New(IDENTIFIER, "foo", nil, 1)
expected := "foo"
if token.String() != expected {
t.Errorf("expected %v, got %v", expected, token.String())
}
}
func TestLookupKeyword(t *testing.T) {
tests := []struct {
identifier string
expected TokenType
}{
{"and", AND},
{"class", CLASS},
{"else", ELSE},
{"false", FALSE},
{"for", FOR},
{"fun", FUN},
{"if", IF},
{"nil", NIL},
{"or", OR},
{"print", PRINT},
{"return", RETURN},
{"super", SUPER},
{"this", THIS},
{"true", TRUE},
{"var", VAR},
{"while", WHILE},
{"foobar", IDENTIFIER},
}
for _, tt := range tests {
tokenType := LookupKeyword(tt.identifier)
if tokenType != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, tokenType)
}
}
}
Loading…
Cancel
Save