diff options
Diffstat (limited to 'syntax/scan_test.go')
-rw-r--r-- | syntax/scan_test.go | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/syntax/scan_test.go b/syntax/scan_test.go new file mode 100644 index 0000000..9582bd7 --- /dev/null +++ b/syntax/scan_test.go @@ -0,0 +1,310 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "bytes" + "fmt" + "go/build" + "io/ioutil" + "path/filepath" + "strings" + "testing" +) + +func scan(src interface{}) (tokens string, err error) { + sc, err := newScanner("foo.star", src, false) + if err != nil { + return "", err + } + + defer sc.recover(&err) + + var buf bytes.Buffer + var val tokenValue + for { + tok := sc.nextToken(&val) + + if buf.Len() > 0 { + buf.WriteByte(' ') + } + switch tok { + case EOF: + buf.WriteString("EOF") + case IDENT: + buf.WriteString(val.raw) + case INT: + if val.bigInt != nil { + fmt.Fprintf(&buf, "%d", val.bigInt) + } else { + fmt.Fprintf(&buf, "%d", val.int) + } + case FLOAT: + fmt.Fprintf(&buf, "%e", val.float) + case STRING, BYTES: + buf.WriteString(Quote(val.string, tok == BYTES)) + default: + buf.WriteString(tok.String()) + } + if tok == EOF { + break + } + } + return buf.String(), nil +} + +func TestScanner(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {``, "EOF"}, + {`123`, "123 EOF"}, + {`x.y`, "x . y EOF"}, + {`chocolate.éclair`, `chocolate . éclair EOF`}, + {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, + {`print(x)`, "print ( x ) EOF"}, + {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, + {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token + {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, + {`# hello +print(x)`, "print ( x ) EOF"}, + {`# hello +print(1) +cc_binary(name="foo") +def f(x): + return x+1 +print(1) +`, + `print ( 1 ) newline ` + + `cc_binary ( name = "foo" ) newline ` + + `def f ( x ) : newline ` + + `indent return x + 1 newline ` + + `outdent print ( 1 ) newline ` + + `EOF`}, + // EOF should act line an implicit newline. + {`def f(): pass`, + "def f ( ) : pass EOF"}, + {`def f(): + pass`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +# oops`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass \ +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`pass + + +pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated + {`def f(): + pass + `, "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass + ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, + {"pass", "pass EOF"}, + {"pass\n", "pass newline EOF"}, + {"pass\n ", "pass newline EOF"}, + {"pass\n \n", "pass newline EOF"}, + {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, + {`x = 1 + \ +2`, `x = 1 + 2 EOF`}, + {`x = 'a\nb'`, `x = "a\nb" EOF`}, + {`x = r'a\nb'`, `x = "a\\nb" EOF`}, + {"x = 'a\\\nb'", `x = "ab" EOF`}, + {`x = '\''`, `x = "'" EOF`}, + {`x = "\""`, `x = "\"" EOF`}, + {`x = r'\''`, `x = "\\'" EOF`}, + {`x = '''\''''`, `x = "'" EOF`}, + {`x = r'''\''''`, `x = "\\'" EOF`}, + {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, + {"x = '''a\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\rb'''", `x = "a\nb" EOF`}, + {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, + {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, + {"a\rb", `a newline b EOF`}, + {"a\nb", `a newline b EOF`}, + {"a\r\nb", `a newline b EOF`}, + {"a\n\nb", `a newline b EOF`}, + // numbers + {"0", `0 EOF`}, + {"00", `0 EOF`}, + {"0.", `0.000000e+00 EOF`}, + {"0.e1", `0.000000e+00 EOF`}, + {".0", `0.000000e+00 EOF`}, + {"0.0", `0.000000e+00 EOF`}, + {".e1", `. e1 EOF`}, + {"1", `1 EOF`}, + {"1.", `1.000000e+00 EOF`}, + {".1", `1.000000e-01 EOF`}, + {".1e1", `1.000000e+00 EOF`}, + {".1e+1", `1.000000e+00 EOF`}, + {".1e-1", `1.000000e-02 EOF`}, + {"1e1", `1.000000e+01 EOF`}, + {"1e+1", `1.000000e+01 EOF`}, + {"1e-1", `1.000000e-01 EOF`}, + {"123", `123 EOF`}, + {"123e45", `1.230000e+47 EOF`}, + {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, + {"12345678901234567890", `12345678901234567890 EOF`}, + // hex + {"0xA", `10 EOF`}, + {"0xAAG", `170 G EOF`}, + {"0xG", `foo.star:1:1: invalid hex literal`}, + {"0XA", `10 EOF`}, + {"0XG", `foo.star:1:1: invalid hex literal`}, + {"0xA.", `10 . EOF`}, + {"0xA.e1", `10 . e1 EOF`}, + {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, + // binary + {"0b1010", `10 EOF`}, + {"0B111101", `61 EOF`}, + {"0b3", `foo.star:1:3: invalid binary literal`}, + {"0b1010201", `10 201 EOF`}, + {"0b1010.01", `10 1.000000e-02 EOF`}, + {"0b0000", `0 EOF`}, + // octal + {"0o123", `83 EOF`}, + {"0o12834", `10 834 EOF`}, + {"0o12934", `10 934 EOF`}, + {"0o12934.", `10 9.340000e+02 EOF`}, + {"0o12934.1", `10 9.341000e+02 EOF`}, + {"0o12934e1", `10 9.340000e+03 EOF`}, + {"0o123.", `83 . EOF`}, + {"0o123.1", `83 1.000000e-01 EOF`}, + {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`}, + {"012834", `foo.star:1:1: invalid int literal`}, + {"012934", `foo.star:1:1: invalid int literal`}, + {"i = 012934", `foo.star:1:5: invalid int literal`}, + // octal escapes in string literals + {`"\037"`, `"\x1f" EOF`}, + {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`}, + {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' + {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3 + // hex escapes + {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable + {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`}, + {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`}, + {`"\x"`, `foo.star:1:1: truncated escape sequence \x`}, + {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`}, + // Unicode escapes + // \uXXXX + {`"\u0400"`, `"Ѐ" EOF`}, + {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`}, + {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' + {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`}, + {`"\u4E16"`, `"世" EOF`}, + {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate + // \UXXXXXXXX + {`"\U00000400"`, `"Ѐ" EOF`}, + {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`}, + {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' + {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`}, + {`"\U0010FFFF"`, `"\U0010ffff" EOF`}, + {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`}, + {`"\U0001F63F"`, `"😿" EOF`}, + {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate + + // backslash escapes + // As in Go, a backslash must escape something. + // (Python started issuing a deprecation warning in 3.6.) + {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`}, + {`"\+"`, `foo.star:1:1: invalid escape sequence \+`}, + {`"\w"`, `foo.star:1:1: invalid escape sequence \w`}, + {`"\""`, `"\"" EOF`}, + {`"\'"`, `"'" EOF`}, + {`'\w'`, `foo.star:1:1: invalid escape sequence \w`}, + {`'\''`, `"'" EOF`}, + {`'\"'`, `"\"" EOF`}, + {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`}, + {`"""\""""`, `"\"" EOF`}, + {`"""\'"""`, `"'" EOF`}, + {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`}, + {`'''\''''`, `"'" EOF`}, + {`'''\"'''`, `"\"" EOF`}, + {`r"\w"`, `"\\w" EOF`}, + {`r"\""`, `"\\\"" EOF`}, + {`r"\'"`, `"\\'" EOF`}, + {`r'\w'`, `"\\w" EOF`}, + {`r'\''`, `"\\'" EOF`}, + {`r'\"'`, `"\\\"" EOF`}, + {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`}, + {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`}, + // bytes literals (where they differ from text strings) + {`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal + {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes + {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII + {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`}, + {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string) + // floats starting with octal digits + {"012934.", `1.293400e+04 EOF`}, + {"012934.1", `1.293410e+04 EOF`}, + {"012934e1", `1.293400e+05 EOF`}, + {"0123.", `1.230000e+02 EOF`}, + {"0123.1", `1.231000e+02 EOF`}, + // github.com/google/skylark/issues/16 + {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, + // github.com/google/starlark-go/issues/80 + {"([{<>}])", "( [ { < > } ] ) EOF"}, + {"f();", "f ( ) ; EOF"}, + // github.com/google/starlark-go/issues/104 + {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`}, + {`while cond: pass`, "while cond : pass EOF"}, + // github.com/google/starlark-go/issues/107 + {"~= ~= 5", "~ = ~ = 5 EOF"}, + {"0in", "0 in EOF"}, + {"0or", "foo.star:1:3: invalid octal literal"}, + {"6in", "6 in EOF"}, + {"6or", "6 or EOF"}, + } { + got, err := scan(test.input) + if err != nil { + got = err.(Error).Error() + } + // Prefix match allows us to truncate errors in expecations. + // Success cases all end in EOF. + if !strings.HasPrefix(got, test.want) { + t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) + } + } +} + +// dataFile is the same as starlarktest.DataFile. +// We make a copy to avoid a dependency cycle. +var dataFile = func(pkgdir, filename string) string { + return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) +} + +func BenchmarkScan(b *testing.B) { + filename := dataFile("syntax", "testdata/scan.star") + b.StopTimer() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + sc, err := newScanner(filename, data, false) + if err != nil { + b.Fatal(err) + } + var val tokenValue + for sc.nextToken(&val) != EOF { + } + } +} |