diff options
author | alandonovan <adonovan@google.com> | 2020-03-26 17:56:36 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-26 17:56:36 -0400 |
commit | e8819e80789406f2590edd4458cc385b5144a3e9 (patch) | |
tree | 694610dd5876455536aeec9ee6656a463a59a181 | |
parent | 16e44b11d94568b6de240a245c9f85c415e69bbc (diff) | |
download | starlark-go-e8819e80789406f2590edd4458cc385b5144a3e9.tar.gz |
syntax: fix scanString bug affecting REPL (#266)
Prior to this change, the REPL incorrectly scanned
regular and raw string literals containing newlines
because scanString falsely assumed that only
triple-quoted strings can span lines.
```
>>> 'a\
... b'
<stdin>:1:2: string literal has invalid quotes
>>> r'a
... b'
<stdin>:1:2: string literal has invalid quotes
```
Now it does the right thing:
```
>>> 'a\
... b'
"ab"
>>> r'a\
... b'
"a\\\nb"
```
-rw-r--r-- | syntax/scan.go | 31 | ||||
-rw-r--r-- | syntax/scan_test.go | 1 |
2 files changed, 20 insertions, 12 deletions
diff --git a/syntax/scan.go b/syntax/scan.go index 51cf885..53d9f5c 100644 --- a/syntax/scan.go +++ b/syntax/scan.go @@ -805,13 +805,26 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token { start := sc.pos triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote) sc.readRune() + + // String literals may contain escaped or unescaped newlines, + // causing them to span multiple lines (gulps) of REPL input; + // they are the only such token. Thus we cannot call endToken, + // as it assumes sc.rest is unchanged since startToken. + // Instead, buffer the token here. + // TODO(adonovan): opt: buffer only if we encounter a newline. + raw := new(strings.Builder) + + // Copy the prefix, e.g. r' or " (see startToken). + raw.Write(sc.token[:len(sc.token)-len(sc.rest)]) + if !triple { - // Precondition: startToken was already called. + // single-quoted string literal for { if sc.eof() { sc.error(val.pos, "unexpected EOF in string") } c := sc.readRune() + raw.WriteRune(c) if c == quote { break } @@ -822,22 +835,16 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token { if sc.eof() { sc.error(val.pos, "unexpected EOF in string") } - sc.readRune() + c = sc.readRune() + raw.WriteRune(c) } } - sc.endToken(val) } else { // triple-quoted string literal sc.readRune() + raw.WriteRune(quote) sc.readRune() - - // A triple-quoted string literal may span multiple - // gulps of REPL input; it is the only such token. - // Thus we must avoid {start,end}Token. - raw := new(strings.Builder) - - // Copy the prefix, e.g. r''' or """ (see startToken). - raw.Write(sc.token[:len(sc.token)-len(sc.rest)]) + raw.WriteRune(quote) quoteCount := 0 for { @@ -862,8 +869,8 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token { raw.WriteRune(c) } } - val.raw = raw.String() } + val.raw = raw.String() s, _, err := unquote(val.raw) if err != nil { diff --git a/syntax/scan_test.go b/syntax/scan_test.go index a63ec81..fecf26f 100644 --- a/syntax/scan_test.go +++ b/syntax/scan_test.go @@ -119,6 +119,7 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated 2`, `x = 1 + 2 EOF`}, {`x = 'a\nb'`, `x = "a\nb" EOF`}, {`x = r'a\nb'`, `x = "a\\nb" EOF`}, + {"x = 'a\\\nb'", `x = "ab" EOF`}, {`x = '\''`, `x = "'" EOF`}, {`x = "\""`, `x = "\"" EOF`}, {`x = r'\''`, `x = "\\'" EOF`}, |