Fix a bug with javadoc comments followed by unicode escapes

The lexer was advancing to the next character and then reading the string contents of the javadoc comment. It assumed that the next token corresponded to exactly one character in the input stream, which isn't true of unicode escapes. PiperOrigin-RevId: 499989979
author: Liam Miller-Cushon <cushon@google.com> 2023-01-05 13:22:08 -0800
committer: Javac Team <javac-team+copybara@google.com> 2023-01-05 20:29:36 -0800
commit: dd5e33a173263942cddfa96514430c89f01b6b3a (patch)
tree: ec7cef8edcce0056f41b3fc7541e0a83689c7678
parent: 346639cf34d17015d293f5c1af04d5700b2dc908 (diff)
download: turbine-dd5e33a173263942cddfa96514430c89f01b6b3a.tar.gz
2 files changed, 12 insertions, 3 deletions
diff --git a/java/com/google/turbine/parse/StreamLexer.java b/java/com/google/turbine/parse/StreamLexer.java
index 3d46b90..ed79dd0 100644
--- a/java/com/google/turbine/parse/StreamLexer.java
+++ b/java/com/google/turbine/parse/StreamLexer.java
@@ -75,8 +75,8 @@ public class StreamLexer implements Lexer {
     if (result == null) {
       return null;
     }
-    verify(result.endsWith("*/"), result);
-    return result.substring(0, result.length() - "*/".length());
+    verify(result.endsWith("*"), result);
+    return result.substring(0, result.length() - "*".length());
   }
 
   @Override
@@ -153,16 +153,18 @@ public class StreamLexer implements Lexer {
                       sawStar = true;
                       break;
                     case '/':
-                      eat();
                       if (sawStar) {
                         if (isJavadoc) {
                           // Save the comment, excluding the leading `/**` and including
                           // the trailing `/*`. The comment is trimmed and normalized later.
                           javadoc = stringValue();
+                          verify(javadoc.endsWith("*"), javadoc);
                         }
+                        eat();
                         continue OUTER;
                       }
                       sawStar = false;
+                      eat();
                       break;
                     case ASCII_SUB:
                       if (reader.done()) {
diff --git a/javatests/com/google/turbine/parse/LexerTest.java b/javatests/com/google/turbine/parse/LexerTest.java
index bf0b374..6a6fe1c 100644
--- a/javatests/com/google/turbine/parse/LexerTest.java
+++ b/javatests/com/google/turbine/parse/LexerTest.java
@@ -339,6 +339,11 @@ public class LexerTest {
     lexerComparisonTest("import pkg\uD800\uDC00.test;");
   }
 
+  @Test
+  public void javadocUnicodeEscape() {
+    lexerComparisonTest("class {/***/\\u007D;");
+  }
+
   private void lexerComparisonTest(String s) {
     assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s));
   }
@@ -349,6 +354,8 @@ public class LexerTest {
     Token token;
     do {
       token = lexer.next();
+      // Just check that javadoc handling doesn't crash
+      String unused = lexer.javadoc();
       String tokenString;
       switch (token) {
         case IDENT:
author	Liam Miller-Cushon <cushon@google.com>	2023-01-05 13:22:08 -0800
committer	Javac Team <javac-team+copybara@google.com>	2023-01-05 20:29:36 -0800
commit	dd5e33a173263942cddfa96514430c89f01b6b3a (patch)
tree	ec7cef8edcce0056f41b3fc7541e0a83689c7678
parent	346639cf34d17015d293f5c1af04d5700b2dc908 (diff)
download	turbine-dd5e33a173263942cddfa96514430c89f01b6b3a.tar.gz