package scanner import ( "golox/errors" "golox/token" "strconv" ) // Scanner is a struct that holds the source code, the start and current position // of the scanner, the current line, and the tokens that have been scanned. type Scanner struct { source string start int current int line int tokens []token.Token errLogger errors.Logger } // New creates a new Scanner struct with the given source code. func New(source string, el errors.Logger) *Scanner { return &Scanner{ source: source, // The source code to scan. start: 0, // The start position of the scanner. current: 0, // The current position of the scanner. line: 1, // The current line number. tokens: []token.Token{}, // The tokens that have been scanned. errLogger: el, // The error logger. } } // scanToken scans the next token in the source code. func (s *Scanner) ScanTokens() []token.Token { for !s.isAtEnd() { s.start = s.current s.scanToken() } s.tokens = append(s.tokens, token.New(token.EOF, "", nil, s.line)) return s.tokens } // isAtEnd returns true if the scanner has reached the end of the source code. func (s *Scanner) isAtEnd() bool { return s.current >= len(s.source) } // scanToken scans the next token in the source code. func (s *Scanner) scanToken() { c := s.advance() switch c { case '(': s.addToken(token.LEFT_PAREN) case ')': s.addToken(token.RIGHT_PAREN) case '{': s.addToken(token.LEFT_BRACE) case '}': s.addToken(token.RIGHT_BRACE) case ',': s.addToken(token.COMMA) case '.': s.addToken(token.DOT) case '-': s.addToken(token.MINUS) case '+': s.addToken(token.PLUS) case ';': s.addToken(token.SEMICOLON) case '*': s.addToken(token.STAR) case '!': if s.match('=') { s.addToken(token.BANG_EQUAL) } else { s.addToken(token.BANG) } case '=': if s.match('=') { s.addToken(token.EQUAL_EQUAL) } else { s.addToken(token.EQUAL) } case '<': if s.match('=') { s.addToken(token.LESS_EQUAL) } else { s.addToken(token.LESS) } case '>': if s.match('=') { s.addToken(token.GREATER_EQUAL) } else { s.addToken(token.GREATER) } case '/': if s.match('/') { // A comment goes until the end of the line. for s.peek() != '\n' && !s.isAtEnd() { s.advance() } } else { s.addToken(token.SLASH) } case ' ', '\r', '\t': // Ignore whitespace. case '\n': s.line++ case '"': s.string() case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': s.number() default: if isAlpha(c) { s.identifier() } else { s.errLogger.Error(s.line, "Unexpected character.") } } } // identifier scans an identifier token. func (s *Scanner) identifier() { for isAlpha(s.peek()) || isDigit(s.peek()) { s.advance() } text := s.source[s.start:s.current] // Get the token type for the identifier (keyword or identifier). t := token.LookupKeyword(text) s.addToken(t) } // number scans a number token. func (s *Scanner) number() { for isDigit(s.peek()) { s.advance() } // Look for a fractional part. if s.peek() == '.' && isDigit(s.peekNext()) { // Consume the "." s.advance() for isDigit(s.peek()) { s.advance() } } f, err := strconv.ParseFloat(s.source[s.start:s.current], 64) if err != nil { s.errLogger.Error(s.line, "Could not parse number.") return } s.addTokenLiteral(token.NUMBER, f) } // string scans a string token. func (s *Scanner) string() { for s.peek() != '"' && !s.isAtEnd() { if s.peek() == '\n' { s.line++ } s.advance() } if s.isAtEnd() { s.errLogger.Error(s.line, "Unterminated string.") return } // The closing ". s.advance() // Trim the surrounding quotes. value := s.source[s.start+1 : s.current-1] s.addTokenLiteral(token.STRING, value) } // match returns true if the current character matches the expected character. // If the current character matches the expected character, the character is consumed. // If not, there is no side effect. func (s *Scanner) match(expected byte) bool { if s.isAtEnd() { return false } if s.source[s.current] != expected { return false } s.current++ return true } // peek returns the character at the current position without consuming it. func (s *Scanner) peek() byte { if s.isAtEnd() { return '\000' } return s.source[s.current] } // peekNext returns the character at the next position without consuming it. func (s *Scanner) peekNext() byte { if s.current+1 >= len(s.source) { return '\000' } return s.source[s.current+1] } // isAlpha returns true if the character is an alphabetic character. func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' } // isDigit returns true if the character is a digit. func isDigit(c byte) bool { return c >= '0' && c <= '9' } // advance increments the current position of the scanner and // returns the character at that position. func (s *Scanner) advance() byte { c := s.source[s.current] s.current++ return c } // addToken adds a token to the list of tokens. func (s *Scanner) addToken(t token.TokenType) { s.addTokenLiteral(t, nil) } // addTokenLiteral adds a token with a literal value to the list of tokens. func (s *Scanner) addTokenLiteral(t token.TokenType, literal interface{}) { text := s.source[s.start:s.current] // This selects a half-open range which includes the first element, but excludes the last one s.tokens = append(s.tokens, token.New(t, text, literal, s.line)) }