aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralandonovan <adonovan@google.com>2020-03-26 17:56:36 -0400
committerGitHub <noreply@github.com>2020-03-26 17:56:36 -0400
commite8819e80789406f2590edd4458cc385b5144a3e9 (patch)
tree694610dd5876455536aeec9ee6656a463a59a181
parent16e44b11d94568b6de240a245c9f85c415e69bbc (diff)
downloadstarlark-go-e8819e80789406f2590edd4458cc385b5144a3e9.tar.gz
syntax: fix scanString bug affecting REPL (#266)
Prior to this change, the REPL incorrectly scanned regular and raw string literals containing newlines because scanString falsely assumed that only triple-quoted strings can span lines. ``` >>> 'a\ ... b' <stdin>:1:2: string literal has invalid quotes >>> r'a ... b' <stdin>:1:2: string literal has invalid quotes ``` Now it does the right thing: ``` >>> 'a\ ... b' "ab" >>> r'a\ ... b' "a\\\nb" ```
-rw-r--r--syntax/scan.go31
-rw-r--r--syntax/scan_test.go1
2 files changed, 20 insertions, 12 deletions
diff --git a/syntax/scan.go b/syntax/scan.go
index 51cf885..53d9f5c 100644
--- a/syntax/scan.go
+++ b/syntax/scan.go
@@ -805,13 +805,26 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
start := sc.pos
triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
sc.readRune()
+
+ // String literals may contain escaped or unescaped newlines,
+ // causing them to span multiple lines (gulps) of REPL input;
+ // they are the only such token. Thus we cannot call endToken,
+ // as it assumes sc.rest is unchanged since startToken.
+ // Instead, buffer the token here.
+ // TODO(adonovan): opt: buffer only if we encounter a newline.
+ raw := new(strings.Builder)
+
+ // Copy the prefix, e.g. r' or " (see startToken).
+ raw.Write(sc.token[:len(sc.token)-len(sc.rest)])
+
if !triple {
- // Precondition: startToken was already called.
+ // single-quoted string literal
for {
if sc.eof() {
sc.error(val.pos, "unexpected EOF in string")
}
c := sc.readRune()
+ raw.WriteRune(c)
if c == quote {
break
}
@@ -822,22 +835,16 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
if sc.eof() {
sc.error(val.pos, "unexpected EOF in string")
}
- sc.readRune()
+ c = sc.readRune()
+ raw.WriteRune(c)
}
}
- sc.endToken(val)
} else {
// triple-quoted string literal
sc.readRune()
+ raw.WriteRune(quote)
sc.readRune()
-
- // A triple-quoted string literal may span multiple
- // gulps of REPL input; it is the only such token.
- // Thus we must avoid {start,end}Token.
- raw := new(strings.Builder)
-
- // Copy the prefix, e.g. r''' or """ (see startToken).
- raw.Write(sc.token[:len(sc.token)-len(sc.rest)])
+ raw.WriteRune(quote)
quoteCount := 0
for {
@@ -862,8 +869,8 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
raw.WriteRune(c)
}
}
- val.raw = raw.String()
}
+ val.raw = raw.String()
s, _, err := unquote(val.raw)
if err != nil {
diff --git a/syntax/scan_test.go b/syntax/scan_test.go
index a63ec81..fecf26f 100644
--- a/syntax/scan_test.go
+++ b/syntax/scan_test.go
@@ -119,6 +119,7 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
2`, `x = 1 + 2 EOF`},
{`x = 'a\nb'`, `x = "a\nb" EOF`},
{`x = r'a\nb'`, `x = "a\\nb" EOF`},
+ {"x = 'a\\\nb'", `x = "ab" EOF`},
{`x = '\''`, `x = "'" EOF`},
{`x = "\""`, `x = "\"" EOF`},
{`x = r'\''`, `x = "\\'" EOF`},