aboutsummaryrefslogtreecommitdiff
path: root/syntax/scan_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'syntax/scan_test.go')
-rw-r--r--syntax/scan_test.go310
1 files changed, 310 insertions, 0 deletions
diff --git a/syntax/scan_test.go b/syntax/scan_test.go
new file mode 100644
index 0000000..9582bd7
--- /dev/null
+++ b/syntax/scan_test.go
@@ -0,0 +1,310 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "go/build"
+ "io/ioutil"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func scan(src interface{}) (tokens string, err error) {
+ sc, err := newScanner("foo.star", src, false)
+ if err != nil {
+ return "", err
+ }
+
+ defer sc.recover(&err)
+
+ var buf bytes.Buffer
+ var val tokenValue
+ for {
+ tok := sc.nextToken(&val)
+
+ if buf.Len() > 0 {
+ buf.WriteByte(' ')
+ }
+ switch tok {
+ case EOF:
+ buf.WriteString("EOF")
+ case IDENT:
+ buf.WriteString(val.raw)
+ case INT:
+ if val.bigInt != nil {
+ fmt.Fprintf(&buf, "%d", val.bigInt)
+ } else {
+ fmt.Fprintf(&buf, "%d", val.int)
+ }
+ case FLOAT:
+ fmt.Fprintf(&buf, "%e", val.float)
+ case STRING, BYTES:
+ buf.WriteString(Quote(val.string, tok == BYTES))
+ default:
+ buf.WriteString(tok.String())
+ }
+ if tok == EOF {
+ break
+ }
+ }
+ return buf.String(), nil
+}
+
+func TestScanner(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ {``, "EOF"},
+ {`123`, "123 EOF"},
+ {`x.y`, "x . y EOF"},
+ {`chocolate.éclair`, `chocolate . éclair EOF`},
+ {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
+ {`print(x)`, "print ( x ) EOF"},
+ {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
+ {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
+ {`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
+ {`# hello
+print(x)`, "print ( x ) EOF"},
+ {`# hello
+print(1)
+cc_binary(name="foo")
+def f(x):
+ return x+1
+print(1)
+`,
+ `print ( 1 ) newline ` +
+ `cc_binary ( name = "foo" ) newline ` +
+ `def f ( x ) : newline ` +
+ `indent return x + 1 newline ` +
+ `outdent print ( 1 ) newline ` +
+ `EOF`},
+ // EOF should act line an implicit newline.
+ {`def f(): pass`,
+ "def f ( ) : pass EOF"},
+ {`def f():
+ pass`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+# oops`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass \
+`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`pass
+
+
+pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
+ {`def f():
+ pass
+ `, "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+ ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
+ {"pass", "pass EOF"},
+ {"pass\n", "pass newline EOF"},
+ {"pass\n ", "pass newline EOF"},
+ {"pass\n \n", "pass newline EOF"},
+ {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"},
+ {`x = 1 + \
+2`, `x = 1 + 2 EOF`},
+ {`x = 'a\nb'`, `x = "a\nb" EOF`},
+ {`x = r'a\nb'`, `x = "a\\nb" EOF`},
+ {"x = 'a\\\nb'", `x = "ab" EOF`},
+ {`x = '\''`, `x = "'" EOF`},
+ {`x = "\""`, `x = "\"" EOF`},
+ {`x = r'\''`, `x = "\\'" EOF`},
+ {`x = '''\''''`, `x = "'" EOF`},
+ {`x = r'''\''''`, `x = "\\'" EOF`},
+ {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
+ {"x = '''a\nb'''", `x = "a\nb" EOF`},
+ {"x = '''a\rb'''", `x = "a\nb" EOF`},
+ {"x = '''a\r\nb'''", `x = "a\nb" EOF`},
+ {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
+ {"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
+ {"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
+ {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
+ {"a\rb", `a newline b EOF`},
+ {"a\nb", `a newline b EOF`},
+ {"a\r\nb", `a newline b EOF`},
+ {"a\n\nb", `a newline b EOF`},
+ // numbers
+ {"0", `0 EOF`},
+ {"00", `0 EOF`},
+ {"0.", `0.000000e+00 EOF`},
+ {"0.e1", `0.000000e+00 EOF`},
+ {".0", `0.000000e+00 EOF`},
+ {"0.0", `0.000000e+00 EOF`},
+ {".e1", `. e1 EOF`},
+ {"1", `1 EOF`},
+ {"1.", `1.000000e+00 EOF`},
+ {".1", `1.000000e-01 EOF`},
+ {".1e1", `1.000000e+00 EOF`},
+ {".1e+1", `1.000000e+00 EOF`},
+ {".1e-1", `1.000000e-02 EOF`},
+ {"1e1", `1.000000e+01 EOF`},
+ {"1e+1", `1.000000e+01 EOF`},
+ {"1e-1", `1.000000e-01 EOF`},
+ {"123", `123 EOF`},
+ {"123e45", `1.230000e+47 EOF`},
+ {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
+ {"12345678901234567890", `12345678901234567890 EOF`},
+ // hex
+ {"0xA", `10 EOF`},
+ {"0xAAG", `170 G EOF`},
+ {"0xG", `foo.star:1:1: invalid hex literal`},
+ {"0XA", `10 EOF`},
+ {"0XG", `foo.star:1:1: invalid hex literal`},
+ {"0xA.", `10 . EOF`},
+ {"0xA.e1", `10 . e1 EOF`},
+ {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
+ // binary
+ {"0b1010", `10 EOF`},
+ {"0B111101", `61 EOF`},
+ {"0b3", `foo.star:1:3: invalid binary literal`},
+ {"0b1010201", `10 201 EOF`},
+ {"0b1010.01", `10 1.000000e-02 EOF`},
+ {"0b0000", `0 EOF`},
+ // octal
+ {"0o123", `83 EOF`},
+ {"0o12834", `10 834 EOF`},
+ {"0o12934", `10 934 EOF`},
+ {"0o12934.", `10 9.340000e+02 EOF`},
+ {"0o12934.1", `10 9.341000e+02 EOF`},
+ {"0o12934e1", `10 9.340000e+03 EOF`},
+ {"0o123.", `83 . EOF`},
+ {"0o123.1", `83 1.000000e-01 EOF`},
+ {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
+ {"012834", `foo.star:1:1: invalid int literal`},
+ {"012934", `foo.star:1:1: invalid int literal`},
+ {"i = 012934", `foo.star:1:5: invalid int literal`},
+ // octal escapes in string literals
+ {`"\037"`, `"\x1f" EOF`},
+ {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
+ {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8'
+ {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
+ // hex escapes
+ {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
+ {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
+ {`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
+ {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
+ // Unicode escapes
+ // \uXXXX
+ {`"\u0400"`, `"Ѐ" EOF`},
+ {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
+ {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+ {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
+ {`"\u4E16"`, `"世" EOF`},
+ {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+ // \UXXXXXXXX
+ {`"\U00000400"`, `"Ѐ" EOF`},
+ {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
+ {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+ {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
+ {`"\U0010FFFF"`, `"\U0010ffff" EOF`},
+ {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
+ {`"\U0001F63F"`, `"😿" EOF`},
+ {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+
+ // backslash escapes
+ // As in Go, a backslash must escape something.
+ // (Python started issuing a deprecation warning in 3.6.)
+ {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
+ {`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
+ {`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
+ {`"\""`, `"\"" EOF`},
+ {`"\'"`, `"'" EOF`},
+ {`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
+ {`'\''`, `"'" EOF`},
+ {`'\"'`, `"\"" EOF`},
+ {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
+ {`"""\""""`, `"\"" EOF`},
+ {`"""\'"""`, `"'" EOF`},
+ {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
+ {`'''\''''`, `"'" EOF`},
+ {`'''\"'''`, `"\"" EOF`},
+ {`r"\w"`, `"\\w" EOF`},
+ {`r"\""`, `"\\\"" EOF`},
+ {`r"\'"`, `"\\'" EOF`},
+ {`r'\w'`, `"\\w" EOF`},
+ {`r'\''`, `"\\'" EOF`},
+ {`r'\"'`, `"\\\"" EOF`},
+ {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
+ {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
+ // bytes literals (where they differ from text strings)
+ {`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal
+ {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes
+ {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
+ {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
+ {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
+ // floats starting with octal digits
+ {"012934.", `1.293400e+04 EOF`},
+ {"012934.1", `1.293410e+04 EOF`},
+ {"012934e1", `1.293400e+05 EOF`},
+ {"0123.", `1.230000e+02 EOF`},
+ {"0123.1", `1.231000e+02 EOF`},
+ // github.com/google/skylark/issues/16
+ {"x ! 0", "foo.star:1:3: unexpected input character '!'"},
+ // github.com/google/starlark-go/issues/80
+ {"([{<>}])", "( [ { < > } ] ) EOF"},
+ {"f();", "f ( ) ; EOF"},
+ // github.com/google/starlark-go/issues/104
+ {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
+ {`while cond: pass`, "while cond : pass EOF"},
+ // github.com/google/starlark-go/issues/107
+ {"~= ~= 5", "~ = ~ = 5 EOF"},
+ {"0in", "0 in EOF"},
+ {"0or", "foo.star:1:3: invalid octal literal"},
+ {"6in", "6 in EOF"},
+ {"6or", "6 or EOF"},
+ } {
+ got, err := scan(test.input)
+ if err != nil {
+ got = err.(Error).Error()
+ }
+ // Prefix match allows us to truncate errors in expecations.
+ // Success cases all end in EOF.
+ if !strings.HasPrefix(got, test.want) {
+ t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
+ }
+ }
+}
+
+// dataFile is the same as starlarktest.DataFile.
+// We make a copy to avoid a dependency cycle.
+var dataFile = func(pkgdir, filename string) string {
+ return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
+
+func BenchmarkScan(b *testing.B) {
+ filename := dataFile("syntax", "testdata/scan.star")
+ b.StopTimer()
+ data, err := ioutil.ReadFile(filename)
+ if err != nil {
+ b.Fatal(err)
+ }
+ b.StartTimer()
+
+ for i := 0; i < b.N; i++ {
+ sc, err := newScanner(filename, data, false)
+ if err != nil {
+ b.Fatal(err)
+ }
+ var val tokenValue
+ for sc.nextToken(&val) != EOF {
+ }
+ }
+}