You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

247 lines
5.4 KiB
Go

package scanner
import (
"golox/errors"
"golox/token"
"strconv"
)
// Scanner is a struct that holds the source code, the start and current position
// of the scanner, the current line, and the tokens that have been scanned.
type Scanner struct {
source string
start int
current int
line int
tokens []token.Token
errLogger errors.Logger
}
// New creates a new Scanner struct with the given source code.
func New(source string, el errors.Logger) *Scanner {
return &Scanner{
source: source, // The source code to scan.
start: 0, // The start position of the scanner.
current: 0, // The current position of the scanner.
line: 1, // The current line number.
tokens: []token.Token{}, // The tokens that have been scanned.
errLogger: el, // The error logger.
}
}
// scanToken scans the next token in the source code.
func (s *Scanner) ScanTokens() []token.Token {
for !s.isAtEnd() {
s.start = s.current
s.scanToken()
}
s.tokens = append(s.tokens, token.New(token.EOF, "", nil, s.line))
return s.tokens
}
// isAtEnd returns true if the scanner has reached the end of the source code.
func (s *Scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
// scanToken scans the next token in the source code.
func (s *Scanner) scanToken() {
c := s.advance()
switch c {
case '(':
s.addToken(token.LEFT_PAREN)
case ')':
s.addToken(token.RIGHT_PAREN)
case '{':
s.addToken(token.LEFT_BRACE)
case '}':
s.addToken(token.RIGHT_BRACE)
case ',':
s.addToken(token.COMMA)
case '.':
s.addToken(token.DOT)
case '-':
s.addToken(token.MINUS)
case '+':
s.addToken(token.PLUS)
case ';':
s.addToken(token.SEMICOLON)
case '*':
s.addToken(token.STAR)
case '!':
if s.match('=') {
s.addToken(token.BANG_EQUAL)
} else {
s.addToken(token.BANG)
}
case '=':
if s.match('=') {
s.addToken(token.EQUAL_EQUAL)
} else {
s.addToken(token.EQUAL)
}
case '<':
if s.match('=') {
s.addToken(token.LESS_EQUAL)
} else {
s.addToken(token.LESS)
}
case '>':
if s.match('=') {
s.addToken(token.GREATER_EQUAL)
} else {
s.addToken(token.GREATER)
}
case '/':
if s.match('/') {
// A comment goes until the end of the line.
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
} else {
s.addToken(token.SLASH)
}
case ' ', '\r', '\t':
// Ignore whitespace.
case '\n':
s.line++
case '"':
s.string()
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
s.number()
default:
if isAlpha(c) {
s.identifier()
} else {
s.errLogger.Error(s.line, "Unexpected character.")
}
}
}
// identifier scans an identifier token.
func (s *Scanner) identifier() {
for isAlpha(s.peek()) || isDigit(s.peek()) {
s.advance()
}
text := s.source[s.start:s.current]
// Get the token type for the identifier (keyword or identifier).
t := token.LookupKeyword(text)
s.addToken(t)
}
// number scans a number token.
func (s *Scanner) number() {
for isDigit(s.peek()) {
s.advance()
}
// Look for a fractional part.
if s.peek() == '.' && isDigit(s.peekNext()) {
// Consume the "."
s.advance()
for isDigit(s.peek()) {
s.advance()
}
}
f, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
if err != nil {
s.errLogger.Error(s.line, "Could not parse number.")
return
}
s.addTokenLiteral(token.NUMBER, f)
}
// string scans a string token.
func (s *Scanner) string() {
for s.peek() != '"' && !s.isAtEnd() {
if s.peek() == '\n' {
s.line++
}
s.advance()
}
if s.isAtEnd() {
s.errLogger.Error(s.line, "Unterminated string.")
return
}
// The closing ".
s.advance()
// Trim the surrounding quotes.
value := s.source[s.start+1 : s.current-1]
s.addTokenLiteral(token.STRING, value)
}
// match returns true if the current character matches the expected character.
// If the current character matches the expected character, the character is consumed.
// If not, there is no side effect.
func (s *Scanner) match(expected byte) bool {
if s.isAtEnd() {
return false
}
if s.source[s.current] != expected {
return false
}
s.current++
return true
}
// peek returns the character at the current position without consuming it.
func (s *Scanner) peek() byte {
if s.isAtEnd() {
return '\000'
}
return s.source[s.current]
}
// peekNext returns the character at the next position without consuming it.
func (s *Scanner) peekNext() byte {
if s.current+1 >= len(s.source) {
return '\000'
}
return s.source[s.current+1]
}
// isAlpha returns true if the character is an alphabetic character.
func isAlpha(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}
// isDigit returns true if the character is a digit.
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// advance increments the current position of the scanner and
// returns the character at that position.
func (s *Scanner) advance() byte {
c := s.source[s.current]
s.current++
return c
}
// addToken adds a token to the list of tokens.
func (s *Scanner) addToken(t token.TokenType) {
s.addTokenLiteral(t, nil)
}
// addTokenLiteral adds a token with a literal value to the list of tokens.
func (s *Scanner) addTokenLiteral(t token.TokenType, literal interface{}) {
text := s.source[s.start:s.current] // This selects a half-open range which includes the first element, but excludes the last one
s.tokens = append(s.tokens, token.New(t, text, literal, s.line))
}