126 lines
3.1 KiB
Go
Raw Normal View History

package lexer
import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"text/scanner"
"unicode/utf8"
)
// TextScannerLexer is a lexer that uses the text/scanner module.
var (
TextScannerLexer Definition = &defaultDefinition{}
// DefaultDefinition defines properties for the default lexer.
DefaultDefinition = TextScannerLexer
)
type defaultDefinition struct{}
func (d *defaultDefinition) Lex(r io.Reader) (Lexer, error) {
return Lex(r), nil
}
func (d *defaultDefinition) Symbols() map[string]rune {
return map[string]rune{
"EOF": scanner.EOF,
"Char": scanner.Char,
"Ident": scanner.Ident,
"Int": scanner.Int,
"Float": scanner.Float,
"String": scanner.String,
"RawString": scanner.RawString,
"Comment": scanner.Comment,
}
}
// textScannerLexer is a Lexer based on text/scanner.Scanner
type textScannerLexer struct {
scanner *scanner.Scanner
filename string
err error
}
// Lex an io.Reader with text/scanner.Scanner.
//
// This provides very fast lexing of source code compatible with Go tokens.
//
// Note that this differs from text/scanner.Scanner in that string tokens will be unquoted.
func Lex(r io.Reader) Lexer {
lexer := lexWithScanner(r, &scanner.Scanner{})
lexer.scanner.Error = func(s *scanner.Scanner, msg string) {
// This is to support single quoted strings. Hacky.
if msg != "illegal char literal" {
lexer.err = Errorf(Position(lexer.scanner.Pos()), msg)
}
}
return lexer
}
// LexWithScanner creates a Lexer from a user-provided scanner.Scanner.
//
// Useful if you need to customise the Scanner.
func LexWithScanner(r io.Reader, scan *scanner.Scanner) Lexer {
return lexWithScanner(r, scan)
}
func lexWithScanner(r io.Reader, scan *scanner.Scanner) *textScannerLexer {
lexer := &textScannerLexer{
filename: NameOfReader(r),
scanner: scan,
}
lexer.scanner.Init(r)
return lexer
}
// LexBytes returns a new default lexer over bytes.
func LexBytes(b []byte) Lexer {
return Lex(bytes.NewReader(b))
}
// LexString returns a new default lexer over a string.
func LexString(s string) Lexer {
return Lex(strings.NewReader(s))
}
func (t *textScannerLexer) Next() (Token, error) {
typ := t.scanner.Scan()
text := t.scanner.TokenText()
pos := Position(t.scanner.Position)
pos.Filename = t.filename
if t.err != nil {
return Token{}, t.err
}
return textScannerTransform(Token{
Type: typ,
Value: text,
Pos: pos,
})
}
func textScannerTransform(token Token) (Token, error) {
// Unquote strings.
switch token.Type {
case scanner.Char:
// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
// quoted string in order to support single quoted strings.
token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
fallthrough
case scanner.String:
s, err := strconv.Unquote(token.Value)
if err != nil {
return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
}
token.Value = s
if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
token.Type = scanner.String
}
case scanner.RawString:
token.Value = token.Value[1 : len(token.Value)-1]
}
return token, nil
}