Import Cronet version 117.0.5938.0

Project import generated by Copybara. FolderOrigin-RevId: /tmp/copybara-origin/src Change-Id: Ib7683d0ed240e11ed9068152600c8092afba4571
author: Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com> 2023-08-14 17:15:38 +0000
committer: Mohannad Farrag <aymanm@google.com> 2023-08-14 17:22:36 +0000
commit: ec3a8e8db24bb3ce4b078106b358ca1c4389c14f (patch)
tree: 823f64849ad509483bfebb2252199a5fe79b8e43 /url
parent: d12afe756882b2521faa0b33cbd4813fcea04c22 (diff)
download: cronet-ec3a8e8db24bb3ce4b078106b358ca1c4389c14f.tar.gz
13 files changed, 134 insertions, 95 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn
index b5d6f6063..8f5fea125 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn
@@ -7,6 +7,7 @@ import("//testing/libfuzzer/fuzzer_test.gni")
 import("//testing/test.gni")
 import("features.gni")
 
+import("//build/config/android/jni.gni")
 import("//build/config/cronet/config.gni")
 
 if (is_android || is_robolectric) {
@@ -103,10 +104,11 @@ component("url") {
 
 if (is_android || is_robolectric) {
   generate_jni("url_jni_headers") {
-    sources = [
-      "android/java/src/org/chromium/url/IDNStringUtil.java",
-      "android/java/src/org/chromium/url/Origin.java",
-    ]
+    sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
+  }
+
+  generate_jni("origin_jni_headers") {
+    sources = [ "android/java/src/org/chromium/url/Origin.java" ]
   }
 
   generate_jni("gurl_jni_headers") {
@@ -141,8 +143,8 @@ if (is_android || is_robolectric) {
 
     deps = [
       ":gurl_android",
+      ":origin_jni_headers",
       ":url",
-      ":url_jni_headers",
       "//base",
     ]
   }
@@ -157,6 +159,7 @@ if (is_android) {
 
 if (is_android && !is_cronet_build) {
   android_library("gurl_java") {
+    srcjar_deps = [ ":gurl_jni_headers" ]
     sources = [
       "android/java/src/org/chromium/url/GURL.java",
       "android/java/src/org/chromium/url/Parsed.java",
@@ -170,10 +173,10 @@ if (is_android && !is_cronet_build) {
       "//third_party/androidx:androidx_annotation_annotation_java",
       "//url/mojom:url_mojom_gurl_java",
     ]
-    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
   }
 
   android_library("origin_java") {
+    srcjar_deps = [ ":origin_jni_headers" ]
     sources = [ "android/java/src/org/chromium/url/Origin.java" ]
     deps = [
       ":gurl_java",
@@ -183,7 +186,6 @@ if (is_android && !is_cronet_build) {
       "//mojo/public/mojom/base:base_java",
       "//url/mojom:url_mojom_origin_java",
     ]
-    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
   }
 }
 
@@ -307,7 +309,8 @@ if (is_android && !is_cronet_build) {
 
   android_library("android_test_helper_java") {
     testonly = true
-    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+
+    srcjar_deps = [ ":j_test_jni_headers" ]
     sources = [
       "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
       "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
@@ -362,7 +365,6 @@ if (is_android && !is_cronet_build) {
       "//url/mojom:url_mojom_gurl_java",
       "//url/mojom:url_mojom_origin_java",
     ]
-    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
   }
 
   # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
@@ -389,10 +391,12 @@ if (is_android && !is_cronet_build) {
   }
 }
 
-if (is_robolectric) {
-  # Use this in robolectric_binary() targets if you just need GURL and //base
-  # functionality. Otherwise, define a custom shared_library().
-  shared_library("libgurl_robolectric") {
+# Use this in robolectric_binary() targets if you just need GURL and //base
+# functionality. Otherwise, define a custom shared_library().
+if (!is_cronet_build && target_os == "android") {
+  shared_library_with_jni("libgurl_robolectric") {
+    testonly = true
+    enable_target = is_robolectric
     sources = [ "android/robolectric_test_main.cc" ]
     deps = [
       "//base",
@@ -401,5 +405,7 @@ if (is_robolectric) {
 
     # Make jni.h available.
     configs += [ "//third_party/jdk" ]
+
+    java_targets = [ "//chrome/android:chrome_junit_tests" ]
   }
 }
diff --git a/url/android/java/src/org/chromium/url/GURL.java b/url/android/java/src/org/chromium/url/GURL.java
index 34bd92495..09698e4bb 100644
--- a/url/android/java/src/org/chromium/url/GURL.java
+++ b/url/android/java/src/org/chromium/url/GURL.java
@@ -8,7 +8,6 @@ import android.os.SystemClock;
 import android.text.TextUtils;
 
 import androidx.annotation.Nullable;
-import androidx.annotation.VisibleForTesting;
 
 import com.google.errorprone.annotations.DoNotMock;
 
@@ -21,7 +20,6 @@ import org.chromium.base.library_loader.LibraryLoader;
 import org.chromium.base.metrics.RecordHistogram;
 import org.chromium.base.task.PostTask;
 import org.chromium.base.task.TaskTraits;
-import org.chromium.build.annotations.MainDex;
 import org.chromium.url.mojom.Url;
 import org.chromium.url.mojom.UrlConstants;
 
@@ -39,7 +37,6 @@ import java.util.Random;
  * reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use.
  */
 @JNINamespace("url")
-@MainDex
 @DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java")
 public class GURL {
     private static final String TAG = "GURL";
@@ -371,7 +368,6 @@ public class GURL {
     }
 
     /** Inits this GURL with the internal state of another GURL. */
-    @VisibleForTesting
     /* package */ void initForTesting(GURL gurl) {
         init(gurl.mSpec, gurl.mIsValid, gurl.mParsed);
     }
diff --git a/url/android/java/src/org/chromium/url/Parsed.java b/url/android/java/src/org/chromium/url/Parsed.java
index ca41cfb1f..75d12cb9a 100644
--- a/url/android/java/src/org/chromium/url/Parsed.java
+++ b/url/android/java/src/org/chromium/url/Parsed.java
@@ -7,12 +7,10 @@ package org.chromium.url;
 import org.chromium.base.annotations.CalledByNative;
 import org.chromium.base.annotations.JNINamespace;
 import org.chromium.base.annotations.NativeMethods;
-import org.chromium.build.annotations.MainDex;
 
 /**
  * A java wrapper for Parsed, GURL's internal parsed URI representation.
  */
-@MainDex
 @JNINamespace("url")
 /* package */ class Parsed {
     /* package */ final int mSchemeBegin;
diff --git a/url/android/origin_android.cc b/url/android/origin_android.cc
index a0dd271b5..a75a9a369 100644
--- a/url/android/origin_android.cc
+++ b/url/android/origin_android.cc
@@ -11,7 +11,7 @@
 #include "base/android/scoped_java_ref.h"
 #include "base/memory/ptr_util.h"
 #include "url/android/gurl_android.h"
-#include "url/url_jni_headers/Origin_jni.h"
+#include "url/origin_jni_headers/Origin_jni.h"
 
 namespace url {
 
diff --git a/url/gurl.cc b/url/gurl.cc
index 6930f73b6..9f2e5fad6 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -13,6 +13,7 @@
 
 #include "base/check_op.h"
 #include "base/no_destructor.h"
+#include "base/notreached.h"
 #include "base/strings/string_piece.h"
 #include "base/strings/string_util.h"
 #include "base/trace_event/base_tracing.h"
@@ -158,7 +159,9 @@ const std::string& GURL::spec() const {
   if (is_valid_ || spec_.empty())
     return spec_;
 
-  DCHECK(false) << "Trying to get the spec of an invalid URL!";
+  // TODO(crbug.com/851128): Make sure this no longer hits before making
+  // NOTREACHED_NORETURN();
+  NOTREACHED() << "Trying to get the spec of an invalid URL!";
   return base::EmptyString();
 }
 
diff --git a/url/third_party/mozilla/README.chromium b/url/third_party/mozilla/README.chromium
index ef396d3d1..9df199239 100644
--- a/url/third_party/mozilla/README.chromium
+++ b/url/third_party/mozilla/README.chromium
@@ -1,7 +1,11 @@
 Name: url_parse
-URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp
+Version: unknown
+URL: https://hg.mozilla.org/mozilla-central/file/tip/netwerk/base/nsURLParsers.cpp
 License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1
 License File: LICENSE.txt
+Shipped: yes
+Security Critical: yes
+CPEPrefix: unknown
 
 Description:
 
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index f6219209e..574a5ed0d 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -81,10 +81,11 @@ void DoAppendStringOfType(const CHAR* source,
   }
   for (; i < length; i++) {
     if (static_cast<UCHAR>(source[i]) >= 0x80) {
-      // ReadChar will fill the code point with kUnicodeReplacementCharacter
-      // when the input is invalid, which is what we want.
+      // ReadUTFCharLossy will fill the code point with
+      // kUnicodeReplacementCharacter when the input is invalid, which is what
+      // we want.
       base_icu::UChar32 code_point;
-      ReadUTFChar(source, &i, length, &code_point);
+      ReadUTFCharLossy(source, &i, length, &code_point);
       AppendUTF8EscapedValue(code_point, output);
     } else {
       // Just append the 7-bit character, possibly escaping it.
@@ -312,24 +313,22 @@ void AppendStringOfType(const char16_t* source,
   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
 }
 
-bool ReadUTFChar(const char* str,
-                 size_t* begin,
-                 size_t length,
-                 base_icu::UChar32* code_point_out) {
-  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
-      !base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char* str,
+                      size_t* begin,
+                      size_t length,
+                      base_icu::UChar32* code_point_out) {
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
     return false;
   }
   return true;
 }
 
-bool ReadUTFChar(const char16_t* str,
-                 size_t* begin,
-                 size_t length,
-                 base_icu::UChar32* code_point_out) {
-  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
-      !base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char16_t* str,
+                      size_t* begin,
+                      size_t length,
+                      base_icu::UChar32* code_point_out) {
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
     return false;
   }
@@ -356,7 +355,7 @@ bool ConvertUTF16ToUTF8(const char16_t* input,
   bool success = true;
   for (size_t i = 0; i < input_len; i++) {
     base_icu::UChar32 code_point;
-    success &= ReadUTFChar(input, &i, input_len, &code_point);
+    success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
     AppendUTF8Value(code_point, output);
   }
   return success;
@@ -368,7 +367,7 @@ bool ConvertUTF8ToUTF16(const char* input,
   bool success = true;
   for (size_t i = 0; i < input_len; i++) {
     base_icu::UChar32 code_point;
-    success &= ReadUTFChar(input, &i, input_len, &code_point);
+    success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
     AppendUTF16Value(code_point, output);
   }
   return success;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 13481f5fd..0f1400d56 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -144,19 +144,19 @@ extern const base_icu::UChar32 kUnicodeReplacementCharacter;
 
 // UTF-8 functions ------------------------------------------------------------
 
-// Reads one character in UTF-8 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-8 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
 //
 // |*begin| will be updated to point to the last character consumed so it
 // can be incremented in a loop and will be ready for the next character.
 // (for a single-byte ASCII character, it will not be changed).
 COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char* str,
-                 size_t* begin,
-                 size_t length,
-                 base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char* str,
+                      size_t* begin,
+                      size_t length,
+                      base_icu::UChar32* code_point_out);
 
 // Generic To-UTF-8 converter. This will call the given append method for each
 // character that should be appended, with the given output method. Wrappers
@@ -216,19 +216,19 @@ inline void AppendUTF8EscapedValue(base_icu::UChar32 char_value,
 
 // UTF-16 functions -----------------------------------------------------------
 
-// Reads one character in UTF-16 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-16 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
 //
 // |*begin| will be updated to point to the last character consumed so it
 // can be incremented in a loop and will be ready for the next character.
 // (for a single-16-bit-word character, it will not be changed).
 COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char16_t* str,
-                 size_t* begin,
-                 size_t length,
-                 base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char16_t* str,
+                      size_t* begin,
+                      size_t length,
+                      base_icu::UChar32* code_point_out);
 
 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
 inline void AppendUTF16Value(base_icu::UChar32 code_point,
@@ -266,11 +266,11 @@ inline bool AppendUTF8EscapedChar(const char16_t* str,
                                   size_t* begin,
                                   size_t length,
                                   CanonOutput* output) {
-  // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
-  // us the kUnicodeReplacementCharacter, so we don't have to do special
+  // UTF-16 input. ReadUTFCharLossy will handle invalid characters for us and
+  // give us the kUnicodeReplacementCharacter, so we don't have to do special
   // checking after failure, just pass through the failure to the caller.
   base_icu::UChar32 char_value;
-  bool success = ReadUTFChar(str, begin, length, &char_value);
+  bool success = ReadUTFCharLossy(str, begin, length, &char_value);
   AppendUTF8EscapedValue(char_value, output);
   return success;
 }
@@ -280,11 +280,11 @@ inline bool AppendUTF8EscapedChar(const char* str,
                                   size_t* begin,
                                   size_t length,
                                   CanonOutput* output) {
-  // ReadUTF8Char will handle invalid characters for us and give us the
+  // ReadUTFCharLossy will handle invalid characters for us and give us the
   // kUnicodeReplacementCharacter, so we don't have to do special checking
   // after failure, just pass through the failure to the caller.
   base_icu::UChar32 ch;
-  bool success = ReadUTFChar(str, begin, length, &ch);
+  bool success = ReadUTFCharLossy(str, begin, length, &ch);
   AppendUTF8EscapedValue(ch, output);
   return success;
 }
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 676468d5d..2cd84b409 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -6,6 +6,7 @@
 
 #include "base/check.h"
 #include "base/check_op.h"
+#include "base/metrics/histogram_functions.h"
 #include "third_party/abseil-cpp/absl/types/optional.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
@@ -37,25 +38,24 @@ enum CharacterFlags {
   // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these
   // characters unescaped, they should just be copied.
   UNESCAPE = 4,
-
-  // This character is disallowed in URLs. Note that the "special" bit is also
-  // set to trigger handling.
-  INVALID_BIT = 8,
-  INVALID = INVALID_BIT | SPECIAL,
 };
 
 // This table contains one of the above flag values. Note some flags are more
 // than one bits because they also turn on the "special" flag. Special is the
 // only flag that may be combined with others.
 //
-// This table is designed to match exactly what IE does with the characters.
+// This table was used to be designed to match exactly what IE did with the
+// characters, however, which doesn't comply with the URL Standard as of Jun
+// 2023. See http://crbug.com/1400251 and http://crbug.com/1252531 for efforts
+// to comply with the URL Standard.
 //
 // Dot is even more special, and the escaped version is handled specially by
 // IsDot. Therefore, we don't need the "escape" flag, and even the "unescape"
 // bit is never handled (we just need the "special") bit.
+// clang-format off
 const unsigned char kPathCharLookup[0x100] = {
 //   NULL     control chars...
-     INVALID, ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE , ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
 //   control chars...
      ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
 //   ' '      !        "        #        $        %        &        '        (        )        *        +        ,        -        .        /
@@ -79,6 +79,7 @@ const unsigned char kPathCharLookup[0x100] = {
      ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
      ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
      ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE};
+// clang-format on
 
 enum DotDisposition {
   // The given dot is just part of a filename and is not special.
@@ -266,6 +267,7 @@ bool DoPartialPathInternal(const CHAR* spec,
   absl::optional<size_t> last_invalid_percent_index;
 
   bool success = true;
+  bool unescape_escaped_char = false;
   for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
     UCHAR uch = static_cast<UCHAR>(spec[i]);
     if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -333,6 +335,8 @@ bool DoPartialPathInternal(const CHAR* spec,
 
             if (unescaped_flags & UNESCAPE) {
               // This escaped value shouldn't be escaped.  Try to copy it.
+              unescape_escaped_char = true;
+
               output->push_back(unescaped_value);
               // If we just unescaped a value within 2 output characters of the
               // '%' from a previously-detected invalid escape sequence, we
@@ -354,8 +358,6 @@ bool DoPartialPathInternal(const CHAR* spec,
               output->push_back('%');
               output->push_back(static_cast<char>(spec[i - 1]));
               output->push_back(static_cast<char>(spec[i]));
-              if (unescaped_flags & INVALID_BIT)
-                success = false;
             }
           } else {
             // Invalid escape sequence. IE7+ rejects any URLs with such
@@ -366,12 +368,6 @@ bool DoPartialPathInternal(const CHAR* spec,
             last_invalid_percent_index = output->length();
             output->push_back('%');
           }
-
-        } else if (flags & INVALID_BIT) {
-          // For NULLs, etc. fail.
-          AppendEscapedChar(out_ch, output);
-          success = false;
-
         } else if (flags & ESCAPE_BIT) {
           // This character should be escaped.
           AppendEscapedChar(out_ch, output);
@@ -382,6 +378,8 @@ bool DoPartialPathInternal(const CHAR* spec,
       }
     }
   }
+  base::UmaHistogramBoolean("URL.Path.UnescapeEscapedChar",
+                            unescape_escaped_char);
   return success;
 }
 
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index dee00d86e..0fe0cb34f 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -10,6 +10,7 @@
 #include "base/strings/string_piece.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/test/gtest_util.h"
+#include "base/test/metrics/histogram_tester.h"
 #include "base/test/scoped_feature_list.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "url/third_party/mozilla/url_parse.h"
@@ -335,13 +336,10 @@ TEST_P(URLCanonHostTest, Host) {
        L"bar.com",
        "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
       // Invalid unicode characters should fail...
-      // ...In wide input, ICU will barf and we'll end up with the input as
-      //    escaped UTF-8 (the invalid character should be replaced with the
-      //    replacement character).
-      {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+      {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%B7%90zyx.com",
        Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
       // ...This is the same as previous but with with escaped.
-      {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+      {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%B7%90zyx.com",
        Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
       // Test name prepping, fullwidth input should be converted to ASCII and
       // NOT
@@ -1083,7 +1081,7 @@ TEST(URLCanonTest, CanonicalizeHostSubstring) {
         test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
         Component(0, 8), &output));
     output.Complete();
-    EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+    EXPECT_EQ("%EF%B7%90zyx.com", out_str);
   }
 
   // Should return true for empty input strings.
@@ -1282,8 +1280,8 @@ DualComponentCase kCommonPathCases[] = {
     // Funny characters that are unescaped should be escaped
     {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
     {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
-    // Invalid characters that are escaped should cause a failure.
-    {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+    // %00 should not cause failures.
+    {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), true},
     // Some characters should be passed through unchanged regardless of esc.
     {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
      true},
@@ -1325,10 +1323,9 @@ DualComponentCase kCommonPathCases[] = {
     {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
      L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
      Component(0, 37), true},
-    // Invalid unicode characters should fail. We only do validation on
-    // UTF-16 input, so this doesn't happen on 8-bit.
+    // Unicode Noncharacter (U+FDD0) should not fail.
     {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
-    {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+    {nullptr, L"/\xfdd0zyx", "/%EF%B7%90zyx", Component(0, 13), true},
 };
 
 typedef bool (*CanonFunc8Bit)(const char*,
@@ -1390,7 +1387,7 @@ TEST(URLCanonTest, Path) {
              CanonicalizePath);
 
   // Manual test: embedded NULLs should be escaped and the URL should be marked
-  // as invalid.
+  // as valid.
   const char path_with_null[] = "/ab\0c";
   Component in_comp(0, 5);
   Component out_comp;
@@ -1399,7 +1396,7 @@ TEST(URLCanonTest, Path) {
   StdStringCanonOutput output(&out_str);
   bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);
   output.Complete();
-  EXPECT_FALSE(success);
+  EXPECT_TRUE(success);
   EXPECT_EQ("/ab%00c", out_str);
 }
 
@@ -1507,7 +1504,7 @@ TEST(URLCanonTest, Ref) {
       {"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true},
       {nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
       // Test a Unicode invalid character.
-      {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true},
+      {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%B7%90", Component(1, 10), true},
       // Refs can have # signs and we should preserve them.
       {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true},
       {"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
@@ -2129,9 +2126,9 @@ TEST(URLCanonTest, CanonicalizePathURL) {
       {"JavaScript:Foo", "javascript:Foo"},
       {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
 
-      // Validation errors should not cause failure. See
+      // Unicode invalid characters should not cause failure. See
       // https://crbug.com/925614.
-      {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
+      {"javascript:\uFFFF", "javascript:%EF%BF%BF"},
   };
 
   for (size_t i = 0; i < std::size(path_cases); i++) {
@@ -2169,7 +2166,7 @@ TEST(URLCanonTest, CanonicalizePathURLPath) {
       {"Foo", L"Foo", "Foo"},
       {"\":This /is interesting;?#", L"\":This /is interesting;?#",
        "\":This /is interesting;?#"},
-      {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+      {"\uFFFF", L"\uFFFF", "%EF%BF%BF"},
   };
 
   for (size_t i = 0; i < std::size(path_cases); i++) {
@@ -2745,4 +2742,27 @@ TEST(URLCanonTest, IDNToASCII) {
   output.set_length(0);
 }
 
+TEST(URLCanonTest, UnescapePathCharHistogram) {
+  struct TestCase {
+    base::StringPiece path;
+    base::HistogramBase::Count cnt;
+  } cases[] = {
+      {"/a", 0},
+      {"/%61", 1},
+      {"/%61%61", 1},
+  };
+
+  for (const auto& c : cases) {
+    base::HistogramTester histogram_tester;
+    Component in_comp(0, c.path.size());
+    Component out_comp;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizePath(c.path.data(), in_comp, &output, &out_comp);
+    ASSERT_TRUE(success);
+    histogram_tester.ExpectBucketCount("URL.Path.UnescapeEscapedChar", 1,
+                                       c.cnt);
+  }
+}
+
 }  // namespace url
diff --git a/url/url_features.cc b/url/url_features.cc
index 8f38ff257..858e82790 100644
--- a/url/url_features.cc
+++ b/url/url_features.cc
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include "url/url_features.h"
+#include "base/feature_list.h"
 
 namespace url {
 
@@ -17,7 +18,7 @@ BASE_FEATURE(kRecordIDNA2008Metrics,
 
 BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing,
              "StrictIPv4EmbeddedIPv6AddressParsing",
-             base::FEATURE_DISABLED_BY_DEFAULT);
+             base::FEATURE_ENABLED_BY_DEFAULT);
 
 // Kill switch for crbug.com/1220361.
 BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
@@ -25,6 +26,13 @@ BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
              base::FEATURE_ENABLED_BY_DEFAULT);
 
 bool IsUsingIDNA2008NonTransitional() {
+  // If the FeatureList isn't available yet, fall back to the feature's default
+  // state. This may happen during early startup, see crbug.com/1441956.
+  if (!base::FeatureList::GetInstance()) {
+    return kUseIDNA2008NonTransitional.default_state ==
+           base::FEATURE_ENABLED_BY_DEFAULT;
+  }
+
   return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
 }
 
diff --git a/url/url_util.cc b/url/url_util.cc
index 67913eb72..001c50e72 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -885,8 +885,8 @@ void DecodeURLEscapeSequences(const char* input,
       // character.
       size_t next_character = i;
       base_icu::UChar32 code_point;
-      if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
-                      &code_point)) {
+      if (ReadUTFCharLossy(unescaped_chars.data(), &next_character,
+                           unescaped_length, &code_point)) {
         // Valid UTF-8 character, convert to UTF-16.
         AppendUTF16Value(code_point, output);
         i = next_character;
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index e1d7801b0..3a2e6e6d5 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -247,6 +247,13 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
       {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
        "pqrstuvwxyz{|}~\x7f/"},
       {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+      // U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement
+      // Character) (http://crbug.com/1416021)
+      {"%ef%bf%bf", "\xef\xbf\xbf"},
+      // U+FDD0 (Noncharacter)
+      {"%ef%b7%90", "\xef\xb7\x90"},
+      // U+FFFD (Replacement Character)
+      {"%ef%bf%bd", "\xef\xbf\xbd"},
   };
 
   for (size_t i = 0; i < std::size(decode_cases); i++) {
author	Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com>	2023-08-14 17:15:38 +0000
committer	Mohannad Farrag <aymanm@google.com>	2023-08-14 17:22:36 +0000
commit	ec3a8e8db24bb3ce4b078106b358ca1c4389c14f (patch)
tree	823f64849ad509483bfebb2252199a5fe79b8e43 /url
parent	d12afe756882b2521faa0b33cbd4813fcea04c22 (diff)
download	cronet-ec3a8e8db24bb3ce4b078106b358ca1c4389c14f.tar.gz