diff options
Diffstat (limited to 'syntax/scan.go')
-rw-r--r-- | syntax/scan.go | 1123 |
1 files changed, 1123 insertions, 0 deletions
diff --git a/syntax/scan.go b/syntax/scan.go new file mode 100644 index 0000000..bb4165e --- /dev/null +++ b/syntax/scan.go @@ -0,0 +1,1123 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// A lexical scanner for Starlark. + +import ( + "fmt" + "io" + "io/ioutil" + "log" + "math/big" + "os" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A Token represents a Starlark lexical token. +type Token int8 + +const ( + ILLEGAL Token = iota + EOF + + NEWLINE + INDENT + OUTDENT + + // Tokens with values + IDENT // x + INT // 123 + FLOAT // 1.23e45 + STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo" + BYTES // b"foo", etc + + // Punctuation + PLUS // + + MINUS // - + STAR // * + SLASH // / + SLASHSLASH // // + PERCENT // % + AMP // & + PIPE // | + CIRCUMFLEX // ^ + LTLT // << + GTGT // >> + TILDE // ~ + DOT // . + COMMA // , + EQ // = + SEMI // ; + COLON // : + LPAREN // ( + RPAREN // ) + LBRACK // [ + RBRACK // ] + LBRACE // { + RBRACE // } + LT // < + GT // > + GE // >= + LE // <= + EQL // == + NEQ // != + PLUS_EQ // += (keep order consistent with PLUS..GTGT) + MINUS_EQ // -= + STAR_EQ // *= + SLASH_EQ // /= + SLASHSLASH_EQ // //= + PERCENT_EQ // %= + AMP_EQ // &= + PIPE_EQ // |= + CIRCUMFLEX_EQ // ^= + LTLT_EQ // <<= + GTGT_EQ // >>= + STARSTAR // ** + + // Keywords + AND + BREAK + CONTINUE + DEF + ELIF + ELSE + FOR + IF + IN + LAMBDA + LOAD + NOT + NOT_IN // synthesized by parser from NOT IN + OR + PASS + RETURN + WHILE + + maxToken +) + +func (tok Token) String() string { return tokenNames[tok] } + +// GoString is like String but quotes punctuation tokens. +// Use Sprintf("%#v", tok) when constructing error messages. +func (tok Token) GoString() string { + if tok >= PLUS && tok <= STARSTAR { + return "'" + tokenNames[tok] + "'" + } + return tokenNames[tok] +} + +var tokenNames = [...]string{ + ILLEGAL: "illegal token", + EOF: "end of file", + NEWLINE: "newline", + INDENT: "indent", + OUTDENT: "outdent", + IDENT: "identifier", + INT: "int literal", + FLOAT: "float literal", + STRING: "string literal", + PLUS: "+", + MINUS: "-", + STAR: "*", + SLASH: "/", + SLASHSLASH: "//", + PERCENT: "%", + AMP: "&", + PIPE: "|", + CIRCUMFLEX: "^", + LTLT: "<<", + GTGT: ">>", + TILDE: "~", + DOT: ".", + COMMA: ",", + EQ: "=", + SEMI: ";", + COLON: ":", + LPAREN: "(", + RPAREN: ")", + LBRACK: "[", + RBRACK: "]", + LBRACE: "{", + RBRACE: "}", + LT: "<", + GT: ">", + GE: ">=", + LE: "<=", + EQL: "==", + NEQ: "!=", + PLUS_EQ: "+=", + MINUS_EQ: "-=", + STAR_EQ: "*=", + SLASH_EQ: "/=", + SLASHSLASH_EQ: "//=", + PERCENT_EQ: "%=", + AMP_EQ: "&=", + PIPE_EQ: "|=", + CIRCUMFLEX_EQ: "^=", + LTLT_EQ: "<<=", + GTGT_EQ: ">>=", + STARSTAR: "**", + AND: "and", + BREAK: "break", + CONTINUE: "continue", + DEF: "def", + ELIF: "elif", + ELSE: "else", + FOR: "for", + IF: "if", + IN: "in", + LAMBDA: "lambda", + LOAD: "load", + NOT: "not", + NOT_IN: "not in", + OR: "or", + PASS: "pass", + RETURN: "return", + WHILE: "while", +} + +// A FilePortion describes the content of a portion of a file. +// Callers may provide a FilePortion for the src argument of Parse +// when the desired initial line and column numbers are not (1, 1), +// such as when an expression is parsed from within larger file. +type FilePortion struct { + Content []byte + FirstLine, FirstCol int32 +} + +// A Position describes the location of a rune of input. +type Position struct { + file *string // filename (indirect for compactness) + Line int32 // 1-based line number; 0 if line unknown + Col int32 // 1-based column (rune) number; 0 if column unknown +} + +// IsValid reports whether the position is valid. +func (p Position) IsValid() bool { return p.file != nil } + +// Filename returns the name of the file containing this position. +func (p Position) Filename() string { + if p.file != nil { + return *p.file + } + return "<invalid>" +} + +// MakePosition returns position with the specified components. +func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} } + +// add returns the position at the end of s, assuming it starts at p. +func (p Position) add(s string) Position { + if n := strings.Count(s, "\n"); n > 0 { + p.Line += int32(n) + s = s[strings.LastIndex(s, "\n")+1:] + p.Col = 1 + } + p.Col += int32(utf8.RuneCountInString(s)) + return p +} + +func (p Position) String() string { + file := p.Filename() + if p.Line > 0 { + if p.Col > 0 { + return fmt.Sprintf("%s:%d:%d", file, p.Line, p.Col) + } + return fmt.Sprintf("%s:%d", file, p.Line) + } + return file +} + +func (p Position) isBefore(q Position) bool { + if p.Line != q.Line { + return p.Line < q.Line + } + return p.Col < q.Col +} + +// An scanner represents a single input file being parsed. +type scanner struct { + rest []byte // rest of input (in REPL, a line of input) + token []byte // token being scanned + pos Position // current input position + depth int // nesting of [ ] { } ( ) + indentstk []int // stack of indentation levels + dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return + lineStart bool // after NEWLINE; convert spaces to indentation tokens + keepComments bool // accumulate comments in slice + lineComments []Comment // list of full line comments (if keepComments) + suffixComments []Comment // list of suffix comments (if keepComments) + + readline func() ([]byte, error) // read next line of input (REPL only) +} + +func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) { + var firstLine, firstCol int32 = 1, 1 + if portion, ok := src.(FilePortion); ok { + firstLine, firstCol = portion.FirstLine, portion.FirstCol + } + sc := &scanner{ + pos: MakePosition(&filename, firstLine, firstCol), + indentstk: make([]int, 1, 10), // []int{0} + spare capacity + lineStart: true, + keepComments: keepComments, + } + sc.readline, _ = src.(func() ([]byte, error)) // ParseCompoundStmt (REPL) only + if sc.readline == nil { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + sc.rest = data + } + return sc, nil +} + +func readSource(filename string, src interface{}) ([]byte, error) { + switch src := src.(type) { + case string: + return []byte(src), nil + case []byte: + return src, nil + case io.Reader: + data, err := ioutil.ReadAll(src) + if err != nil { + err = &os.PathError{Op: "read", Path: filename, Err: err} + return nil, err + } + return data, nil + case FilePortion: + return src.Content, nil + case nil: + return ioutil.ReadFile(filename) + default: + return nil, fmt.Errorf("invalid source: %T", src) + } +} + +// An Error describes the nature and position of a scanner or parser error. +type Error struct { + Pos Position + Msg string +} + +func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } + +// errorf is called to report an error. +// errorf does not return: it panics. +func (sc *scanner) error(pos Position, s string) { + panic(Error{pos, s}) +} + +func (sc *scanner) errorf(pos Position, format string, args ...interface{}) { + sc.error(pos, fmt.Sprintf(format, args...)) +} + +func (sc *scanner) recover(err *error) { + // The scanner and parser panic both for routine errors like + // syntax errors and for programmer bugs like array index + // errors. Turn both into error returns. Catching bug panics + // is especially important when processing many files. + switch e := recover().(type) { + case nil: + // no panic + case Error: + *err = e + default: + *err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)} + if debug { + log.Fatal(*err) + } + } +} + +// eof reports whether the input has reached end of file. +func (sc *scanner) eof() bool { + return len(sc.rest) == 0 && !sc.readLine() +} + +// readLine attempts to read another line of input. +// Precondition: len(sc.rest)==0. +func (sc *scanner) readLine() bool { + if sc.readline != nil { + var err error + sc.rest, err = sc.readline() + if err != nil { + sc.errorf(sc.pos, "%v", err) // EOF or ErrInterrupt + } + return len(sc.rest) > 0 + } + return false +} + +// peekRune returns the next rune in the input without consuming it. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) peekRune() rune { + // TODO(adonovan): opt: measure and perhaps inline eof. + if sc.eof() { + return 0 + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + if b == '\r' { + return '\n' + } + return rune(b) + } + + r, _ := utf8.DecodeRune(sc.rest) + return r +} + +// readRune consumes and returns the next rune in the input. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) readRune() rune { + // eof() has been inlined here, both to avoid a call + // and to establish len(rest)>0 to avoid a bounds check. + if len(sc.rest) == 0 { + if !sc.readLine() { + sc.error(sc.pos, "internal scanner error: readRune at EOF") + } + // Redundant, but eliminates the bounds-check below. + if len(sc.rest) == 0 { + return 0 + } + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + r := rune(b) + sc.rest = sc.rest[1:] + if r == '\r' { + if len(sc.rest) > 0 && sc.rest[0] == '\n' { + sc.rest = sc.rest[1:] + } + r = '\n' + } + if r == '\n' { + sc.pos.Line++ + sc.pos.Col = 1 + } else { + sc.pos.Col++ + } + return r + } + + r, size := utf8.DecodeRune(sc.rest) + sc.rest = sc.rest[size:] + sc.pos.Col++ + return r +} + +// tokenValue records the position and value associated with each token. +type tokenValue struct { + raw string // raw text of token + int int64 // decoded int + bigInt *big.Int // decoded integers > int64 + float float64 // decoded float + string string // decoded string or bytes + pos Position // start position of token +} + +// startToken marks the beginning of the next input token. +// It must be followed by a call to endToken once the token has +// been consumed using readRune. +func (sc *scanner) startToken(val *tokenValue) { + sc.token = sc.rest + val.raw = "" + val.pos = sc.pos +} + +// endToken marks the end of an input token. +// It records the actual token string in val.raw if the caller +// has not done that already. +func (sc *scanner) endToken(val *tokenValue) { + if val.raw == "" { + val.raw = string(sc.token[:len(sc.token)-len(sc.rest)]) + } +} + +// nextToken is called by the parser to obtain the next input token. +// It returns the token value and sets val to the data associated with +// the token. +// +// For all our input tokens, the associated data is val.pos (the +// position where the token begins), val.raw (the input string +// corresponding to the token). For string and int tokens, the string +// and int fields additionally contain the token's interpreted value. +func (sc *scanner) nextToken(val *tokenValue) Token { + + // The following distribution of tokens guides case ordering: + // + // COMMA 27 % + // STRING 23 % + // IDENT 15 % + // EQL 11 % + // LBRACK 5.5 % + // RBRACK 5.5 % + // NEWLINE 3 % + // LPAREN 2.9 % + // RPAREN 2.9 % + // INT 2 % + // others < 1 % + // + // Although NEWLINE tokens are infrequent, and lineStart is + // usually (~97%) false on entry, skipped newlines account for + // about 50% of all iterations of the 'start' loop. + +start: + var c rune + + // Deal with leading spaces and indentation. + blank := false + savedLineStart := sc.lineStart + if sc.lineStart { + sc.lineStart = false + col := 0 + for { + c = sc.peekRune() + if c == ' ' { + col++ + sc.readRune() + } else if c == '\t' { + const tab = 8 + col += int(tab - (sc.pos.Col-1)%tab) + sc.readRune() + } else { + break + } + } + + // The third clause matches EOF. + if c == '#' || c == '\n' || c == 0 { + blank = true + } + + // Compute indentation level for non-blank lines not + // inside an expression. This is not the common case. + if !blank && sc.depth == 0 { + cur := sc.indentstk[len(sc.indentstk)-1] + if col > cur { + // indent + sc.dents++ + sc.indentstk = append(sc.indentstk, col) + } else if col < cur { + // outdent(s) + for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] { + sc.dents-- + sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop + } + if col != sc.indentstk[len(sc.indentstk)-1] { + sc.error(sc.pos, "unindent does not match any outer indentation level") + } + } + } + } + + // Return saved indentation tokens. + if sc.dents != 0 { + sc.startToken(val) + sc.endToken(val) + if sc.dents < 0 { + sc.dents++ + return OUTDENT + } else { + sc.dents-- + return INDENT + } + } + + // start of line proper + c = sc.peekRune() + + // Skip spaces. + for c == ' ' || c == '\t' { + sc.readRune() + c = sc.peekRune() + } + + // comment + if c == '#' { + if sc.keepComments { + sc.startToken(val) + } + // Consume up to newline (included). + for c != 0 && c != '\n' { + sc.readRune() + c = sc.peekRune() + } + if sc.keepComments { + sc.endToken(val) + if blank { + sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw}) + } else { + sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw}) + } + } + } + + // newline + if c == '\n' { + sc.lineStart = true + + // Ignore newlines within expressions (common case). + if sc.depth > 0 { + sc.readRune() + goto start + } + + // Ignore blank lines, except in the REPL, + // where they emit OUTDENTs and NEWLINE. + if blank { + if sc.readline == nil { + sc.readRune() + goto start + } else if len(sc.indentstk) > 1 { + sc.dents = 1 - len(sc.indentstk) + sc.indentstk = sc.indentstk[:1] + goto start + } + } + + // At top-level (not in an expression). + sc.startToken(val) + sc.readRune() + val.raw = "\n" + return NEWLINE + } + + // end of file + if c == 0 { + // Emit OUTDENTs for unfinished indentation, + // preceded by a NEWLINE if we haven't just emitted one. + if len(sc.indentstk) > 1 { + if savedLineStart { + sc.dents = 1 - len(sc.indentstk) + sc.indentstk = sc.indentstk[:1] + goto start + } else { + sc.lineStart = true + sc.startToken(val) + val.raw = "\n" + return NEWLINE + } + } + + sc.startToken(val) + sc.endToken(val) + return EOF + } + + // line continuation + if c == '\\' { + sc.readRune() + if sc.peekRune() != '\n' { + sc.errorf(sc.pos, "stray backslash in program") + } + sc.readRune() + goto start + } + + // start of the next token + sc.startToken(val) + + // comma (common case) + if c == ',' { + sc.readRune() + sc.endToken(val) + return COMMA + } + + // string literal + if c == '"' || c == '\'' { + return sc.scanString(val, c) + } + + // identifier or keyword + if isIdentStart(c) { + if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') { + // r"..." + // b"..." + sc.readRune() + c = sc.peekRune() + return sc.scanString(val, c) + } else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') { + // rb"..." + sc.readRune() + sc.readRune() + c = sc.peekRune() + return sc.scanString(val, c) + } + + for isIdent(c) { + sc.readRune() + c = sc.peekRune() + } + sc.endToken(val) + if k, ok := keywordToken[val.raw]; ok { + return k + } + + return IDENT + } + + // brackets + switch c { + case '[', '(', '{': + sc.depth++ + sc.readRune() + sc.endToken(val) + switch c { + case '[': + return LBRACK + case '(': + return LPAREN + case '{': + return LBRACE + } + panic("unreachable") + + case ']', ')', '}': + if sc.depth == 0 { + sc.errorf(sc.pos, "unexpected %q", c) + } else { + sc.depth-- + } + sc.readRune() + sc.endToken(val) + switch c { + case ']': + return RBRACK + case ')': + return RPAREN + case '}': + return RBRACE + } + panic("unreachable") + } + + // int or float literal, or period + if isdigit(c) || c == '.' { + return sc.scanNumber(val, c) + } + + // other punctuation + defer sc.endToken(val) + switch c { + case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '=' + start := sc.pos + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + switch c { + case '<': + return LE + case '>': + return GE + case '=': + return EQL + case '!': + return NEQ + case '+': + return PLUS_EQ + case '-': + return MINUS_EQ + case '/': + return SLASH_EQ + case '%': + return PERCENT_EQ + case '&': + return AMP_EQ + case '|': + return PIPE_EQ + case '^': + return CIRCUMFLEX_EQ + } + } + switch c { + case '=': + return EQ + case '<': + if sc.peekRune() == '<' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return LTLT_EQ + } else { + return LTLT + } + } + return LT + case '>': + if sc.peekRune() == '>' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return GTGT_EQ + } else { + return GTGT + } + } + return GT + case '!': + sc.error(start, "unexpected input character '!'") + case '+': + return PLUS + case '-': + return MINUS + case '/': + if sc.peekRune() == '/' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return SLASHSLASH_EQ + } else { + return SLASHSLASH + } + } + return SLASH + case '%': + return PERCENT + case '&': + return AMP + case '|': + return PIPE + case '^': + return CIRCUMFLEX + } + panic("unreachable") + + case ':', ';', '~': // single-char tokens (except comma) + sc.readRune() + switch c { + case ':': + return COLON + case ';': + return SEMI + case '~': + return TILDE + } + panic("unreachable") + + case '*': // possibly followed by '*' or '=' + sc.readRune() + switch sc.peekRune() { + case '*': + sc.readRune() + return STARSTAR + case '=': + sc.readRune() + return STAR_EQ + } + return STAR + } + + sc.errorf(sc.pos, "unexpected input character %#q", c) + panic("unreachable") +} + +func (sc *scanner) scanString(val *tokenValue, quote rune) Token { + start := sc.pos + triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote) + sc.readRune() + + // String literals may contain escaped or unescaped newlines, + // causing them to span multiple lines (gulps) of REPL input; + // they are the only such token. Thus we cannot call endToken, + // as it assumes sc.rest is unchanged since startToken. + // Instead, buffer the token here. + // TODO(adonovan): opt: buffer only if we encounter a newline. + raw := new(strings.Builder) + + // Copy the prefix, e.g. r' or " (see startToken). + raw.Write(sc.token[:len(sc.token)-len(sc.rest)]) + + if !triple { + // single-quoted string literal + for { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c := sc.readRune() + raw.WriteRune(c) + if c == quote { + break + } + if c == '\n' { + sc.error(val.pos, "unexpected newline in string") + } + if c == '\\' { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c = sc.readRune() + raw.WriteRune(c) + } + } + } else { + // triple-quoted string literal + sc.readRune() + raw.WriteRune(quote) + sc.readRune() + raw.WriteRune(quote) + + quoteCount := 0 + for { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c := sc.readRune() + raw.WriteRune(c) + if c == quote { + quoteCount++ + if quoteCount == 3 { + break + } + } else { + quoteCount = 0 + } + if c == '\\' { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c = sc.readRune() + raw.WriteRune(c) + } + } + } + val.raw = raw.String() + + s, _, isByte, err := unquote(val.raw) + if err != nil { + sc.error(start, err.Error()) + } + val.string = s + if isByte { + return BYTES + } else { + return STRING + } +} + +func (sc *scanner) scanNumber(val *tokenValue, c rune) Token { + // https://github.com/google/starlark-go/blob/master/doc/spec.md#lexical-elements + // + // Python features not supported: + // - integer literals of >64 bits of precision + // - 123L or 123l long suffix + // - traditional octal: 0755 + // https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals + + start := sc.pos + fraction, exponent := false, false + + if c == '.' { + // dot or start of fraction + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.endToken(val) + return DOT + } + fraction = true + } else if c == '0' { + // hex, octal, binary or float + sc.readRune() + c = sc.peekRune() + + if c == '.' { + fraction = true + } else if c == 'x' || c == 'X' { + // hex + sc.readRune() + c = sc.peekRune() + if !isxdigit(c) { + sc.error(start, "invalid hex literal") + } + for isxdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else if c == 'o' || c == 'O' { + // octal + sc.readRune() + c = sc.peekRune() + if !isodigit(c) { + sc.error(sc.pos, "invalid octal literal") + } + for isodigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else if c == 'b' || c == 'B' { + // binary + sc.readRune() + c = sc.peekRune() + if !isbdigit(c) { + sc.error(sc.pos, "invalid binary literal") + } + for isbdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else { + // float (or obsolete octal "0755") + allzeros, octal := true, true + for isdigit(c) { + if c != '0' { + allzeros = false + } + if c > '7' { + octal = false + } + sc.readRune() + c = sc.peekRune() + } + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } else if octal && !allzeros { + sc.endToken(val) + sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:]) + } + } + } else { + // decimal + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } + } + + if fraction { + sc.readRune() // consume '.' + c = sc.peekRune() + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == 'e' || c == 'E' { + exponent = true + } + } + + if exponent { + sc.readRune() // consume [eE] + c = sc.peekRune() + if c == '+' || c == '-' { + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.error(sc.pos, "invalid float literal") + } + } + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } + + sc.endToken(val) + if fraction || exponent { + var err error + val.float, err = strconv.ParseFloat(val.raw, 64) + if err != nil { + sc.error(sc.pos, "invalid float literal") + } + return FLOAT + } else { + var err error + s := val.raw + val.bigInt = nil + if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') { + val.int, err = strconv.ParseInt(s[2:], 8, 64) + } else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') { + val.int, err = strconv.ParseInt(s[2:], 2, 64) + } else { + val.int, err = strconv.ParseInt(s, 0, 64) + if err != nil { + num := new(big.Int) + var ok bool + val.bigInt, ok = num.SetString(s, 0) + if ok { + err = nil + } + } + } + if err != nil { + sc.error(start, "invalid int literal") + } + return INT + } +} + +// isIdent reports whether c is an identifier rune. +func isIdent(c rune) bool { + return isdigit(c) || isIdentStart(c) +} + +func isIdentStart(c rune) bool { + return 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z' || + c == '_' || + unicode.IsLetter(c) +} + +func isdigit(c rune) bool { return '0' <= c && c <= '9' } +func isodigit(c rune) bool { return '0' <= c && c <= '7' } +func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' } +func isbdigit(c rune) bool { return '0' == c || c == '1' } + +// keywordToken records the special tokens for +// strings that should not be treated as ordinary identifiers. +var keywordToken = map[string]Token{ + "and": AND, + "break": BREAK, + "continue": CONTINUE, + "def": DEF, + "elif": ELIF, + "else": ELSE, + "for": FOR, + "if": IF, + "in": IN, + "lambda": LAMBDA, + "load": LOAD, + "not": NOT, + "or": OR, + "pass": PASS, + "return": RETURN, + "while": WHILE, + + // reserved words: + "as": ILLEGAL, + // "assert": ILLEGAL, // heavily used by our tests + "class": ILLEGAL, + "del": ILLEGAL, + "except": ILLEGAL, + "finally": ILLEGAL, + "from": ILLEGAL, + "global": ILLEGAL, + "import": ILLEGAL, + "is": ILLEGAL, + "nonlocal": ILLEGAL, + "raise": ILLEGAL, + "try": ILLEGAL, + "with": ILLEGAL, + "yield": ILLEGAL, +} |