golox/scanner/scanner.go

package scanner

import (
	"golox/errors"
	"golox/token"
	"strconv"
)

// Scanner is a struct that holds the source code, the start and current position
// of the scanner, the current line, and the tokens that have been scanned.
type Scanner struct {
	source    string
	start     int
	current   int
	line      int
	tokens    []token.Token
	errLogger errors.Logger
}

// New creates a new Scanner struct with the given source code.
func New(source string, el errors.Logger) *Scanner {
	return &Scanner{
		source:    source,          // The source code to scan.
		start:     0,               // The start position of the scanner.
		current:   0,               // The current position of the scanner.
		line:      1,               // The current line number.
		tokens:    []token.Token{}, // The tokens that have been scanned.
		errLogger: el,              // The error logger.
	}
}

// scanToken scans the next token in the source code.
func (s *Scanner) ScanTokens() []token.Token {
	for !s.isAtEnd() {
		s.start = s.current
		s.scanToken()
	}

	s.tokens = append(s.tokens, token.New(token.EOF, "", nil, s.line))
	return s.tokens
}

// isAtEnd returns true if the scanner has reached the end of the source code.
func (s *Scanner) isAtEnd() bool {
	return s.current >= len(s.source)
}

// scanToken scans the next token in the source code.
func (s *Scanner) scanToken() {
	c := s.advance()

	switch c {
	case '(':
		s.addToken(token.LEFT_PAREN)
	case ')':
		s.addToken(token.RIGHT_PAREN)
	case '{':
		s.addToken(token.LEFT_BRACE)
	case '}':
		s.addToken(token.RIGHT_BRACE)
	case ',':
		s.addToken(token.COMMA)
	case '.':
		s.addToken(token.DOT)
	case '-':
		s.addToken(token.MINUS)
	case '+':
		s.addToken(token.PLUS)
	case ';':
		s.addToken(token.SEMICOLON)
	case '*':
		s.addToken(token.STAR)
	case '!':
		if s.match('=') {
			s.addToken(token.BANG_EQUAL)
		} else {
			s.addToken(token.BANG)
		}
	case '=':
		if s.match('=') {
			s.addToken(token.EQUAL_EQUAL)
		} else {
			s.addToken(token.EQUAL)
		}
	case '<':
		if s.match('=') {
			s.addToken(token.LESS_EQUAL)
		} else {
			s.addToken(token.LESS)
		}
	case '>':
		if s.match('=') {
			s.addToken(token.GREATER_EQUAL)
		} else {
			s.addToken(token.GREATER)
		}
	case '/':
		if s.match('/') {
			// A comment goes until the end of the line.
			for s.peek() != '\n' && !s.isAtEnd() {
				s.advance()
			}
		} else {
			s.addToken(token.SLASH)
		}
	case ' ', '\r', '\t':
		// Ignore whitespace.
	case '\n':
		s.line++
	case '"':
		s.string()
	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		s.number()
	default:
		if isAlpha(c) {
			s.identifier()
		} else {
			s.errLogger.Error(s.line, "Unexpected character.")
		}
	}
}

// identifier scans an identifier token.
func (s *Scanner) identifier() {
	for isAlpha(s.peek()) || isDigit(s.peek()) {
		s.advance()
	}

	text := s.source[s.start:s.current]

	// Get the token type for the identifier (keyword or identifier).
	t := token.LookupKeyword(text)

	s.addToken(t)
}

// number scans a number token.
func (s *Scanner) number() {
	for isDigit(s.peek()) {
		s.advance()
	}

	// Look for a fractional part.
	if s.peek() == '.' && isDigit(s.peekNext()) {
		// Consume the "."
		s.advance()

		for isDigit(s.peek()) {
			s.advance()
		}
	}

	f, err := strconv.ParseFloat(s.source[s.start:s.current], 64)

	if err != nil {
		s.errLogger.Error(s.line, "Could not parse number.")
		return
	}

	s.addTokenLiteral(token.NUMBER, f)
}

// string scans a string token.
func (s *Scanner) string() {
	for s.peek() != '"' && !s.isAtEnd() {
		if s.peek() == '\n' {
			s.line++
		}
		s.advance()
	}

	if s.isAtEnd() {
		s.errLogger.Error(s.line, "Unterminated string.")
		return
	}

	// The closing ".
	s.advance()

	// Trim the surrounding quotes.
	value := s.source[s.start+1 : s.current-1]
	s.addTokenLiteral(token.STRING, value)
}

// match returns true if the current character matches the expected character.
// If the current character matches the expected character, the character is consumed.
// If not, there is no side effect.
func (s *Scanner) match(expected byte) bool {
	if s.isAtEnd() {
		return false
	}

	if s.source[s.current] != expected {
		return false
	}

	s.current++
	return true
}

// peek returns the character at the current position without consuming it.
func (s *Scanner) peek() byte {
	if s.isAtEnd() {
		return '\000'
	}

	return s.source[s.current]
}

// peekNext returns the character at the next position without consuming it.
func (s *Scanner) peekNext() byte {
	if s.current+1 >= len(s.source) {
		return '\000'
	}

	return s.source[s.current+1]
}

// isAlpha returns true if the character is an alphabetic character.
func isAlpha(c byte) bool {
	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}

// isDigit returns true if the character is a digit.
func isDigit(c byte) bool {
	return c >= '0' && c <= '9'
}

// advance increments the current position of the scanner and
// returns the character at that position.
func (s *Scanner) advance() byte {
	c := s.source[s.current]
	s.current++
	return c
}

// addToken adds a token to the list of tokens.
func (s *Scanner) addToken(t token.TokenType) {
	s.addTokenLiteral(t, nil)
}

// addTokenLiteral adds a token with a literal value to the list of tokens.
func (s *Scanner) addTokenLiteral(t token.TokenType, literal interface{}) {
	text := s.source[s.start:s.current] // This selects a half-open range which includes the first element, but excludes the last one
	s.tokens = append(s.tokens, token.New(t, text, literal, s.line))
}