diff options
author | Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com> | 2023-08-14 17:15:38 +0000 |
---|---|---|
committer | Mohannad Farrag <aymanm@google.com> | 2023-08-14 17:22:36 +0000 |
commit | ec3a8e8db24bb3ce4b078106b358ca1c4389c14f (patch) | |
tree | 823f64849ad509483bfebb2252199a5fe79b8e43 /url | |
parent | d12afe756882b2521faa0b33cbd4813fcea04c22 (diff) | |
download | cronet-ec3a8e8db24bb3ce4b078106b358ca1c4389c14f.tar.gz |
Import Cronet version 117.0.5938.0
Project import generated by Copybara.
FolderOrigin-RevId: /tmp/copybara-origin/src
Change-Id: Ib7683d0ed240e11ed9068152600c8092afba4571
Diffstat (limited to 'url')
-rw-r--r-- | url/BUILD.gn | 32 | ||||
-rw-r--r-- | url/android/java/src/org/chromium/url/GURL.java | 4 | ||||
-rw-r--r-- | url/android/java/src/org/chromium/url/Parsed.java | 2 | ||||
-rw-r--r-- | url/android/origin_android.cc | 2 | ||||
-rw-r--r-- | url/gurl.cc | 5 | ||||
-rw-r--r-- | url/third_party/mozilla/README.chromium | 6 | ||||
-rw-r--r-- | url/url_canon_internal.cc | 33 | ||||
-rw-r--r-- | url/url_canon_internal.h | 42 | ||||
-rw-r--r-- | url/url_canon_path.cc | 28 | ||||
-rw-r--r-- | url/url_canon_unittest.cc | 54 | ||||
-rw-r--r-- | url/url_features.cc | 10 | ||||
-rw-r--r-- | url/url_util.cc | 4 | ||||
-rw-r--r-- | url/url_util_unittest.cc | 7 |
13 files changed, 134 insertions, 95 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn index b5d6f6063..8f5fea125 100644 --- a/url/BUILD.gn +++ b/url/BUILD.gn @@ -7,6 +7,7 @@ import("//testing/libfuzzer/fuzzer_test.gni") import("//testing/test.gni") import("features.gni") +import("//build/config/android/jni.gni") import("//build/config/cronet/config.gni") if (is_android || is_robolectric) { @@ -103,10 +104,11 @@ component("url") { if (is_android || is_robolectric) { generate_jni("url_jni_headers") { - sources = [ - "android/java/src/org/chromium/url/IDNStringUtil.java", - "android/java/src/org/chromium/url/Origin.java", - ] + sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ] + } + + generate_jni("origin_jni_headers") { + sources = [ "android/java/src/org/chromium/url/Origin.java" ] } generate_jni("gurl_jni_headers") { @@ -141,8 +143,8 @@ if (is_android || is_robolectric) { deps = [ ":gurl_android", + ":origin_jni_headers", ":url", - ":url_jni_headers", "//base", ] } @@ -157,6 +159,7 @@ if (is_android) { if (is_android && !is_cronet_build) { android_library("gurl_java") { + srcjar_deps = [ ":gurl_jni_headers" ] sources = [ "android/java/src/org/chromium/url/GURL.java", "android/java/src/org/chromium/url/Parsed.java", @@ -170,10 +173,10 @@ if (is_android && !is_cronet_build) { "//third_party/androidx:androidx_annotation_annotation_java", "//url/mojom:url_mojom_gurl_java", ] - annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] } android_library("origin_java") { + srcjar_deps = [ ":origin_jni_headers" ] sources = [ "android/java/src/org/chromium/url/Origin.java" ] deps = [ ":gurl_java", @@ -183,7 +186,6 @@ if (is_android && !is_cronet_build) { "//mojo/public/mojom/base:base_java", "//url/mojom:url_mojom_origin_java", ] - annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] } } @@ -307,7 +309,8 @@ if (is_android && !is_cronet_build) { android_library("android_test_helper_java") { testonly = true - annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] + + srcjar_deps = [ ":j_test_jni_headers" ] sources = [ "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java", "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java", @@ -362,7 +365,6 @@ if (is_android && !is_cronet_build) { "//url/mojom:url_mojom_gurl_java", "//url/mojom:url_mojom_origin_java", ] - annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] } # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we @@ -389,10 +391,12 @@ if (is_android && !is_cronet_build) { } } -if (is_robolectric) { - # Use this in robolectric_binary() targets if you just need GURL and //base - # functionality. Otherwise, define a custom shared_library(). - shared_library("libgurl_robolectric") { +# Use this in robolectric_binary() targets if you just need GURL and //base +# functionality. Otherwise, define a custom shared_library(). +if (!is_cronet_build && target_os == "android") { + shared_library_with_jni("libgurl_robolectric") { + testonly = true + enable_target = is_robolectric sources = [ "android/robolectric_test_main.cc" ] deps = [ "//base", @@ -401,5 +405,7 @@ if (is_robolectric) { # Make jni.h available. configs += [ "//third_party/jdk" ] + + java_targets = [ "//chrome/android:chrome_junit_tests" ] } } diff --git a/url/android/java/src/org/chromium/url/GURL.java b/url/android/java/src/org/chromium/url/GURL.java index 34bd92495..09698e4bb 100644 --- a/url/android/java/src/org/chromium/url/GURL.java +++ b/url/android/java/src/org/chromium/url/GURL.java @@ -8,7 +8,6 @@ import android.os.SystemClock; import android.text.TextUtils; import androidx.annotation.Nullable; -import androidx.annotation.VisibleForTesting; import com.google.errorprone.annotations.DoNotMock; @@ -21,7 +20,6 @@ import org.chromium.base.library_loader.LibraryLoader; import org.chromium.base.metrics.RecordHistogram; import org.chromium.base.task.PostTask; import org.chromium.base.task.TaskTraits; -import org.chromium.build.annotations.MainDex; import org.chromium.url.mojom.Url; import org.chromium.url.mojom.UrlConstants; @@ -39,7 +37,6 @@ import java.util.Random; * reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use. */ @JNINamespace("url") -@MainDex @DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java") public class GURL { private static final String TAG = "GURL"; @@ -371,7 +368,6 @@ public class GURL { } /** Inits this GURL with the internal state of another GURL. */ - @VisibleForTesting /* package */ void initForTesting(GURL gurl) { init(gurl.mSpec, gurl.mIsValid, gurl.mParsed); } diff --git a/url/android/java/src/org/chromium/url/Parsed.java b/url/android/java/src/org/chromium/url/Parsed.java index ca41cfb1f..75d12cb9a 100644 --- a/url/android/java/src/org/chromium/url/Parsed.java +++ b/url/android/java/src/org/chromium/url/Parsed.java @@ -7,12 +7,10 @@ package org.chromium.url; import org.chromium.base.annotations.CalledByNative; import org.chromium.base.annotations.JNINamespace; import org.chromium.base.annotations.NativeMethods; -import org.chromium.build.annotations.MainDex; /** * A java wrapper for Parsed, GURL's internal parsed URI representation. */ -@MainDex @JNINamespace("url") /* package */ class Parsed { /* package */ final int mSchemeBegin; diff --git a/url/android/origin_android.cc b/url/android/origin_android.cc index a0dd271b5..a75a9a369 100644 --- a/url/android/origin_android.cc +++ b/url/android/origin_android.cc @@ -11,7 +11,7 @@ #include "base/android/scoped_java_ref.h" #include "base/memory/ptr_util.h" #include "url/android/gurl_android.h" -#include "url/url_jni_headers/Origin_jni.h" +#include "url/origin_jni_headers/Origin_jni.h" namespace url { diff --git a/url/gurl.cc b/url/gurl.cc index 6930f73b6..9f2e5fad6 100644 --- a/url/gurl.cc +++ b/url/gurl.cc @@ -13,6 +13,7 @@ #include "base/check_op.h" #include "base/no_destructor.h" +#include "base/notreached.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/trace_event/base_tracing.h" @@ -158,7 +159,9 @@ const std::string& GURL::spec() const { if (is_valid_ || spec_.empty()) return spec_; - DCHECK(false) << "Trying to get the spec of an invalid URL!"; + // TODO(crbug.com/851128): Make sure this no longer hits before making + // NOTREACHED_NORETURN(); + NOTREACHED() << "Trying to get the spec of an invalid URL!"; return base::EmptyString(); } diff --git a/url/third_party/mozilla/README.chromium b/url/third_party/mozilla/README.chromium index ef396d3d1..9df199239 100644 --- a/url/third_party/mozilla/README.chromium +++ b/url/third_party/mozilla/README.chromium @@ -1,7 +1,11 @@ Name: url_parse -URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp +Version: unknown +URL: https://hg.mozilla.org/mozilla-central/file/tip/netwerk/base/nsURLParsers.cpp License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1 License File: LICENSE.txt +Shipped: yes +Security Critical: yes +CPEPrefix: unknown Description: diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc index f6219209e..574a5ed0d 100644 --- a/url/url_canon_internal.cc +++ b/url/url_canon_internal.cc @@ -81,10 +81,11 @@ void DoAppendStringOfType(const CHAR* source, } for (; i < length; i++) { if (static_cast<UCHAR>(source[i]) >= 0x80) { - // ReadChar will fill the code point with kUnicodeReplacementCharacter - // when the input is invalid, which is what we want. + // ReadUTFCharLossy will fill the code point with + // kUnicodeReplacementCharacter when the input is invalid, which is what + // we want. base_icu::UChar32 code_point; - ReadUTFChar(source, &i, length, &code_point); + ReadUTFCharLossy(source, &i, length, &code_point); AppendUTF8EscapedValue(code_point, output); } else { // Just append the 7-bit character, possibly escaping it. @@ -312,24 +313,22 @@ void AppendStringOfType(const char16_t* source, DoAppendStringOfType<char16_t, char16_t>(source, length, type, output); } -bool ReadUTFChar(const char* str, - size_t* begin, - size_t length, - base_icu::UChar32* code_point_out) { - if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) || - !base::IsValidCharacter(*code_point_out)) { +bool ReadUTFCharLossy(const char* str, + size_t* begin, + size_t length, + base_icu::UChar32* code_point_out) { + if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) { *code_point_out = kUnicodeReplacementCharacter; return false; } return true; } -bool ReadUTFChar(const char16_t* str, - size_t* begin, - size_t length, - base_icu::UChar32* code_point_out) { - if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) || - !base::IsValidCharacter(*code_point_out)) { +bool ReadUTFCharLossy(const char16_t* str, + size_t* begin, + size_t length, + base_icu::UChar32* code_point_out) { + if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) { *code_point_out = kUnicodeReplacementCharacter; return false; } @@ -356,7 +355,7 @@ bool ConvertUTF16ToUTF8(const char16_t* input, bool success = true; for (size_t i = 0; i < input_len; i++) { base_icu::UChar32 code_point; - success &= ReadUTFChar(input, &i, input_len, &code_point); + success &= ReadUTFCharLossy(input, &i, input_len, &code_point); AppendUTF8Value(code_point, output); } return success; @@ -368,7 +367,7 @@ bool ConvertUTF8ToUTF16(const char* input, bool success = true; for (size_t i = 0; i < input_len; i++) { base_icu::UChar32 code_point; - success &= ReadUTFChar(input, &i, input_len, &code_point); + success &= ReadUTFCharLossy(input, &i, input_len, &code_point); AppendUTF16Value(code_point, output); } return success; diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h index 13481f5fd..0f1400d56 100644 --- a/url/url_canon_internal.h +++ b/url/url_canon_internal.h @@ -144,19 +144,19 @@ extern const base_icu::UChar32 kUnicodeReplacementCharacter; // UTF-8 functions ------------------------------------------------------------ -// Reads one character in UTF-8 starting at |*begin| in |str| and places -// the decoded value into |*code_point|. If the character is valid, we will -// return true. If invalid, we'll return false and put the -// kUnicodeReplacementCharacter into |*code_point|. +// Reads one character in UTF-8 starting at |*begin| in |str|, places +// the decoded value into |*code_point|, and returns true on success. +// Otherwise, we'll return false and put the kUnicodeReplacementCharacter +// into |*code_point|. // // |*begin| will be updated to point to the last character consumed so it // can be incremented in a loop and will be ready for the next character. // (for a single-byte ASCII character, it will not be changed). COMPONENT_EXPORT(URL) -bool ReadUTFChar(const char* str, - size_t* begin, - size_t length, - base_icu::UChar32* code_point_out); +bool ReadUTFCharLossy(const char* str, + size_t* begin, + size_t length, + base_icu::UChar32* code_point_out); // Generic To-UTF-8 converter. This will call the given append method for each // character that should be appended, with the given output method. Wrappers @@ -216,19 +216,19 @@ inline void AppendUTF8EscapedValue(base_icu::UChar32 char_value, // UTF-16 functions ----------------------------------------------------------- -// Reads one character in UTF-16 starting at |*begin| in |str| and places -// the decoded value into |*code_point|. If the character is valid, we will -// return true. If invalid, we'll return false and put the -// kUnicodeReplacementCharacter into |*code_point|. +// Reads one character in UTF-16 starting at |*begin| in |str|, places +// the decoded value into |*code_point|, and returns true on success. +// Otherwise, we'll return false and put the kUnicodeReplacementCharacter +// into |*code_point|. // // |*begin| will be updated to point to the last character consumed so it // can be incremented in a loop and will be ready for the next character. // (for a single-16-bit-word character, it will not be changed). COMPONENT_EXPORT(URL) -bool ReadUTFChar(const char16_t* str, - size_t* begin, - size_t length, - base_icu::UChar32* code_point_out); +bool ReadUTFCharLossy(const char16_t* str, + size_t* begin, + size_t length, + base_icu::UChar32* code_point_out); // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. inline void AppendUTF16Value(base_icu::UChar32 code_point, @@ -266,11 +266,11 @@ inline bool AppendUTF8EscapedChar(const char16_t* str, size_t* begin, size_t length, CanonOutput* output) { - // UTF-16 input. ReadUTFChar will handle invalid characters for us and give - // us the kUnicodeReplacementCharacter, so we don't have to do special + // UTF-16 input. ReadUTFCharLossy will handle invalid characters for us and + // give us the kUnicodeReplacementCharacter, so we don't have to do special // checking after failure, just pass through the failure to the caller. base_icu::UChar32 char_value; - bool success = ReadUTFChar(str, begin, length, &char_value); + bool success = ReadUTFCharLossy(str, begin, length, &char_value); AppendUTF8EscapedValue(char_value, output); return success; } @@ -280,11 +280,11 @@ inline bool AppendUTF8EscapedChar(const char* str, size_t* begin, size_t length, CanonOutput* output) { - // ReadUTF8Char will handle invalid characters for us and give us the + // ReadUTFCharLossy will handle invalid characters for us and give us the // kUnicodeReplacementCharacter, so we don't have to do special checking // after failure, just pass through the failure to the caller. base_icu::UChar32 ch; - bool success = ReadUTFChar(str, begin, length, &ch); + bool success = ReadUTFCharLossy(str, begin, length, &ch); AppendUTF8EscapedValue(ch, output); return success; } diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc index 676468d5d..2cd84b409 100644 --- a/url/url_canon_path.cc +++ b/url/url_canon_path.cc @@ -6,6 +6,7 @@ #include "base/check.h" #include "base/check_op.h" +#include "base/metrics/histogram_functions.h" #include "third_party/abseil-cpp/absl/types/optional.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" @@ -37,25 +38,24 @@ enum CharacterFlags { // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these // characters unescaped, they should just be copied. UNESCAPE = 4, - - // This character is disallowed in URLs. Note that the "special" bit is also - // set to trigger handling. - INVALID_BIT = 8, - INVALID = INVALID_BIT | SPECIAL, }; // This table contains one of the above flag values. Note some flags are more // than one bits because they also turn on the "special" flag. Special is the // only flag that may be combined with others. // -// This table is designed to match exactly what IE does with the characters. +// This table was used to be designed to match exactly what IE did with the +// characters, however, which doesn't comply with the URL Standard as of Jun +// 2023. See http://crbug.com/1400251 and http://crbug.com/1252531 for efforts +// to comply with the URL Standard. // // Dot is even more special, and the escaped version is handled specially by // IsDot. Therefore, we don't need the "escape" flag, and even the "unescape" // bit is never handled (we just need the "special") bit. +// clang-format off const unsigned char kPathCharLookup[0x100] = { // NULL control chars... - INVALID, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, + ESCAPE , ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, // control chars... ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, // ' ' ! " # $ % & ' ( ) * + , - . / @@ -79,6 +79,7 @@ const unsigned char kPathCharLookup[0x100] = { ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE}; +// clang-format on enum DotDisposition { // The given dot is just part of a filename and is not special. @@ -266,6 +267,7 @@ bool DoPartialPathInternal(const CHAR* spec, absl::optional<size_t> last_invalid_percent_index; bool success = true; + bool unescape_escaped_char = false; for (size_t i = static_cast<size_t>(path.begin); i < end; i++) { UCHAR uch = static_cast<UCHAR>(spec[i]); if (sizeof(CHAR) > 1 && uch >= 0x80) { @@ -333,6 +335,8 @@ bool DoPartialPathInternal(const CHAR* spec, if (unescaped_flags & UNESCAPE) { // This escaped value shouldn't be escaped. Try to copy it. + unescape_escaped_char = true; + output->push_back(unescaped_value); // If we just unescaped a value within 2 output characters of the // '%' from a previously-detected invalid escape sequence, we @@ -354,8 +358,6 @@ bool DoPartialPathInternal(const CHAR* spec, output->push_back('%'); output->push_back(static_cast<char>(spec[i - 1])); output->push_back(static_cast<char>(spec[i])); - if (unescaped_flags & INVALID_BIT) - success = false; } } else { // Invalid escape sequence. IE7+ rejects any URLs with such @@ -366,12 +368,6 @@ bool DoPartialPathInternal(const CHAR* spec, last_invalid_percent_index = output->length(); output->push_back('%'); } - - } else if (flags & INVALID_BIT) { - // For NULLs, etc. fail. - AppendEscapedChar(out_ch, output); - success = false; - } else if (flags & ESCAPE_BIT) { // This character should be escaped. AppendEscapedChar(out_ch, output); @@ -382,6 +378,8 @@ bool DoPartialPathInternal(const CHAR* spec, } } } + base::UmaHistogramBoolean("URL.Path.UnescapeEscapedChar", + unescape_escaped_char); return success; } diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index dee00d86e..0fe0cb34f 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc @@ -10,6 +10,7 @@ #include "base/strings/string_piece.h" #include "base/strings/utf_string_conversions.h" #include "base/test/gtest_util.h" +#include "base/test/metrics/histogram_tester.h" #include "base/test/scoped_feature_list.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/third_party/mozilla/url_parse.h" @@ -335,13 +336,10 @@ TEST_P(URLCanonHostTest, Host) { L"bar.com", "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""}, // Invalid unicode characters should fail... - // ...In wide input, ICU will barf and we'll end up with the input as - // escaped UTF-8 (the invalid character should be replaced with the - // replacement character). - {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", + {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%B7%90zyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, // ...This is the same as previous but with with escaped. - {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", + {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%B7%90zyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, // Test name prepping, fullwidth input should be converted to ASCII and // NOT @@ -1083,7 +1081,7 @@ TEST(URLCanonTest, CanonicalizeHostSubstring) { test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(), Component(0, 8), &output)); output.Complete(); - EXPECT_EQ("%EF%BF%BDzyx.com", out_str); + EXPECT_EQ("%EF%B7%90zyx.com", out_str); } // Should return true for empty input strings. @@ -1282,8 +1280,8 @@ DualComponentCase kCommonPathCases[] = { // Funny characters that are unescaped should be escaped {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, - // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, + // %00 should not cause failures. + {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), true}, // Some characters should be passed through unchanged regardless of esc. {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), true}, @@ -1325,10 +1323,9 @@ DualComponentCase kCommonPathCases[] = { {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", Component(0, 37), true}, - // Invalid unicode characters should fail. We only do validation on - // UTF-16 input, so this doesn't happen on 8-bit. + // Unicode Noncharacter (U+FDD0) should not fail. {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, - {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, + {nullptr, L"/\xfdd0zyx", "/%EF%B7%90zyx", Component(0, 13), true}, }; typedef bool (*CanonFunc8Bit)(const char*, @@ -1390,7 +1387,7 @@ TEST(URLCanonTest, Path) { CanonicalizePath); // Manual test: embedded NULLs should be escaped and the URL should be marked - // as invalid. + // as valid. const char path_with_null[] = "/ab\0c"; Component in_comp(0, 5); Component out_comp; @@ -1399,7 +1396,7 @@ TEST(URLCanonTest, Path) { StdStringCanonOutput output(&out_str); bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp); output.Complete(); - EXPECT_FALSE(success); + EXPECT_TRUE(success); EXPECT_EQ("/ab%00c", out_str); } @@ -1507,7 +1504,7 @@ TEST(URLCanonTest, Ref) { {"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true}, {nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true}, // Test a Unicode invalid character. - {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true}, + {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%B7%90", Component(1, 10), true}, // Refs can have # signs and we should preserve them. {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true}, {"#asdf", L"#asdf", "##asdf", Component(1, 5), true}, @@ -2129,9 +2126,9 @@ TEST(URLCanonTest, CanonicalizePathURL) { {"JavaScript:Foo", "javascript:Foo"}, {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"}, - // Validation errors should not cause failure. See + // Unicode invalid characters should not cause failure. See // https://crbug.com/925614. - {"javascript:\uFFFF", "javascript:%EF%BF%BD"}, + {"javascript:\uFFFF", "javascript:%EF%BF%BF"}, }; for (size_t i = 0; i < std::size(path_cases); i++) { @@ -2169,7 +2166,7 @@ TEST(URLCanonTest, CanonicalizePathURLPath) { {"Foo", L"Foo", "Foo"}, {"\":This /is interesting;?#", L"\":This /is interesting;?#", "\":This /is interesting;?#"}, - {"\uFFFF", L"\uFFFF", "%EF%BF%BD"}, + {"\uFFFF", L"\uFFFF", "%EF%BF%BF"}, }; for (size_t i = 0; i < std::size(path_cases); i++) { @@ -2745,4 +2742,27 @@ TEST(URLCanonTest, IDNToASCII) { output.set_length(0); } +TEST(URLCanonTest, UnescapePathCharHistogram) { + struct TestCase { + base::StringPiece path; + base::HistogramBase::Count cnt; + } cases[] = { + {"/a", 0}, + {"/%61", 1}, + {"/%61%61", 1}, + }; + + for (const auto& c : cases) { + base::HistogramTester histogram_tester; + Component in_comp(0, c.path.size()); + Component out_comp; + std::string out_str; + StdStringCanonOutput output(&out_str); + bool success = CanonicalizePath(c.path.data(), in_comp, &output, &out_comp); + ASSERT_TRUE(success); + histogram_tester.ExpectBucketCount("URL.Path.UnescapeEscapedChar", 1, + c.cnt); + } +} + } // namespace url diff --git a/url/url_features.cc b/url/url_features.cc index 8f38ff257..858e82790 100644 --- a/url/url_features.cc +++ b/url/url_features.cc @@ -3,6 +3,7 @@ // found in the LICENSE file. #include "url/url_features.h" +#include "base/feature_list.h" namespace url { @@ -17,7 +18,7 @@ BASE_FEATURE(kRecordIDNA2008Metrics, BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing, "StrictIPv4EmbeddedIPv6AddressParsing", - base::FEATURE_DISABLED_BY_DEFAULT); + base::FEATURE_ENABLED_BY_DEFAULT); // Kill switch for crbug.com/1220361. BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical, @@ -25,6 +26,13 @@ BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical, base::FEATURE_ENABLED_BY_DEFAULT); bool IsUsingIDNA2008NonTransitional() { + // If the FeatureList isn't available yet, fall back to the feature's default + // state. This may happen during early startup, see crbug.com/1441956. + if (!base::FeatureList::GetInstance()) { + return kUseIDNA2008NonTransitional.default_state == + base::FEATURE_ENABLED_BY_DEFAULT; + } + return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional); } diff --git a/url/url_util.cc b/url/url_util.cc index 67913eb72..001c50e72 100644 --- a/url/url_util.cc +++ b/url/url_util.cc @@ -885,8 +885,8 @@ void DecodeURLEscapeSequences(const char* input, // character. size_t next_character = i; base_icu::UChar32 code_point; - if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length, - &code_point)) { + if (ReadUTFCharLossy(unescaped_chars.data(), &next_character, + unescaped_length, &code_point)) { // Valid UTF-8 character, convert to UTF-16. AppendUTF16Value(code_point, output); i = next_character; diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc index e1d7801b0..3a2e6e6d5 100644 --- a/url/url_util_unittest.cc +++ b/url/url_util_unittest.cc @@ -247,6 +247,13 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) { {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", "pqrstuvwxyz{|}~\x7f/"}, {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, + // U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement + // Character) (http://crbug.com/1416021) + {"%ef%bf%bf", "\xef\xbf\xbf"}, + // U+FDD0 (Noncharacter) + {"%ef%b7%90", "\xef\xb7\x90"}, + // U+FFFD (Replacement Character) + {"%ef%bf%bd", "\xef\xbf\xbd"}, }; for (size_t i = 0; i < std::size(decode_cases); i++) { |