summaryrefslogtreecommitdiff
path: root/url
diff options
context:
space:
mode:
authorCronet Mainline Eng <cronet-mainline-eng+copybara@google.com>2023-08-14 17:15:38 +0000
committerMohannad Farrag <aymanm@google.com>2023-08-14 17:22:36 +0000
commitec3a8e8db24bb3ce4b078106b358ca1c4389c14f (patch)
tree823f64849ad509483bfebb2252199a5fe79b8e43 /url
parentd12afe756882b2521faa0b33cbd4813fcea04c22 (diff)
downloadcronet-ec3a8e8db24bb3ce4b078106b358ca1c4389c14f.tar.gz
Import Cronet version 117.0.5938.0
Project import generated by Copybara. FolderOrigin-RevId: /tmp/copybara-origin/src Change-Id: Ib7683d0ed240e11ed9068152600c8092afba4571
Diffstat (limited to 'url')
-rw-r--r--url/BUILD.gn32
-rw-r--r--url/android/java/src/org/chromium/url/GURL.java4
-rw-r--r--url/android/java/src/org/chromium/url/Parsed.java2
-rw-r--r--url/android/origin_android.cc2
-rw-r--r--url/gurl.cc5
-rw-r--r--url/third_party/mozilla/README.chromium6
-rw-r--r--url/url_canon_internal.cc33
-rw-r--r--url/url_canon_internal.h42
-rw-r--r--url/url_canon_path.cc28
-rw-r--r--url/url_canon_unittest.cc54
-rw-r--r--url/url_features.cc10
-rw-r--r--url/url_util.cc4
-rw-r--r--url/url_util_unittest.cc7
13 files changed, 134 insertions, 95 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn
index b5d6f6063..8f5fea125 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn
@@ -7,6 +7,7 @@ import("//testing/libfuzzer/fuzzer_test.gni")
import("//testing/test.gni")
import("features.gni")
+import("//build/config/android/jni.gni")
import("//build/config/cronet/config.gni")
if (is_android || is_robolectric) {
@@ -103,10 +104,11 @@ component("url") {
if (is_android || is_robolectric) {
generate_jni("url_jni_headers") {
- sources = [
- "android/java/src/org/chromium/url/IDNStringUtil.java",
- "android/java/src/org/chromium/url/Origin.java",
- ]
+ sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
+ }
+
+ generate_jni("origin_jni_headers") {
+ sources = [ "android/java/src/org/chromium/url/Origin.java" ]
}
generate_jni("gurl_jni_headers") {
@@ -141,8 +143,8 @@ if (is_android || is_robolectric) {
deps = [
":gurl_android",
+ ":origin_jni_headers",
":url",
- ":url_jni_headers",
"//base",
]
}
@@ -157,6 +159,7 @@ if (is_android) {
if (is_android && !is_cronet_build) {
android_library("gurl_java") {
+ srcjar_deps = [ ":gurl_jni_headers" ]
sources = [
"android/java/src/org/chromium/url/GURL.java",
"android/java/src/org/chromium/url/Parsed.java",
@@ -170,10 +173,10 @@ if (is_android && !is_cronet_build) {
"//third_party/androidx:androidx_annotation_annotation_java",
"//url/mojom:url_mojom_gurl_java",
]
- annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
}
android_library("origin_java") {
+ srcjar_deps = [ ":origin_jni_headers" ]
sources = [ "android/java/src/org/chromium/url/Origin.java" ]
deps = [
":gurl_java",
@@ -183,7 +186,6 @@ if (is_android && !is_cronet_build) {
"//mojo/public/mojom/base:base_java",
"//url/mojom:url_mojom_origin_java",
]
- annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
}
}
@@ -307,7 +309,8 @@ if (is_android && !is_cronet_build) {
android_library("android_test_helper_java") {
testonly = true
- annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+
+ srcjar_deps = [ ":j_test_jni_headers" ]
sources = [
"android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
"android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
@@ -362,7 +365,6 @@ if (is_android && !is_cronet_build) {
"//url/mojom:url_mojom_gurl_java",
"//url/mojom:url_mojom_origin_java",
]
- annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
}
# See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
@@ -389,10 +391,12 @@ if (is_android && !is_cronet_build) {
}
}
-if (is_robolectric) {
- # Use this in robolectric_binary() targets if you just need GURL and //base
- # functionality. Otherwise, define a custom shared_library().
- shared_library("libgurl_robolectric") {
+# Use this in robolectric_binary() targets if you just need GURL and //base
+# functionality. Otherwise, define a custom shared_library().
+if (!is_cronet_build && target_os == "android") {
+ shared_library_with_jni("libgurl_robolectric") {
+ testonly = true
+ enable_target = is_robolectric
sources = [ "android/robolectric_test_main.cc" ]
deps = [
"//base",
@@ -401,5 +405,7 @@ if (is_robolectric) {
# Make jni.h available.
configs += [ "//third_party/jdk" ]
+
+ java_targets = [ "//chrome/android:chrome_junit_tests" ]
}
}
diff --git a/url/android/java/src/org/chromium/url/GURL.java b/url/android/java/src/org/chromium/url/GURL.java
index 34bd92495..09698e4bb 100644
--- a/url/android/java/src/org/chromium/url/GURL.java
+++ b/url/android/java/src/org/chromium/url/GURL.java
@@ -8,7 +8,6 @@ import android.os.SystemClock;
import android.text.TextUtils;
import androidx.annotation.Nullable;
-import androidx.annotation.VisibleForTesting;
import com.google.errorprone.annotations.DoNotMock;
@@ -21,7 +20,6 @@ import org.chromium.base.library_loader.LibraryLoader;
import org.chromium.base.metrics.RecordHistogram;
import org.chromium.base.task.PostTask;
import org.chromium.base.task.TaskTraits;
-import org.chromium.build.annotations.MainDex;
import org.chromium.url.mojom.Url;
import org.chromium.url.mojom.UrlConstants;
@@ -39,7 +37,6 @@ import java.util.Random;
* reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use.
*/
@JNINamespace("url")
-@MainDex
@DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java")
public class GURL {
private static final String TAG = "GURL";
@@ -371,7 +368,6 @@ public class GURL {
}
/** Inits this GURL with the internal state of another GURL. */
- @VisibleForTesting
/* package */ void initForTesting(GURL gurl) {
init(gurl.mSpec, gurl.mIsValid, gurl.mParsed);
}
diff --git a/url/android/java/src/org/chromium/url/Parsed.java b/url/android/java/src/org/chromium/url/Parsed.java
index ca41cfb1f..75d12cb9a 100644
--- a/url/android/java/src/org/chromium/url/Parsed.java
+++ b/url/android/java/src/org/chromium/url/Parsed.java
@@ -7,12 +7,10 @@ package org.chromium.url;
import org.chromium.base.annotations.CalledByNative;
import org.chromium.base.annotations.JNINamespace;
import org.chromium.base.annotations.NativeMethods;
-import org.chromium.build.annotations.MainDex;
/**
* A java wrapper for Parsed, GURL's internal parsed URI representation.
*/
-@MainDex
@JNINamespace("url")
/* package */ class Parsed {
/* package */ final int mSchemeBegin;
diff --git a/url/android/origin_android.cc b/url/android/origin_android.cc
index a0dd271b5..a75a9a369 100644
--- a/url/android/origin_android.cc
+++ b/url/android/origin_android.cc
@@ -11,7 +11,7 @@
#include "base/android/scoped_java_ref.h"
#include "base/memory/ptr_util.h"
#include "url/android/gurl_android.h"
-#include "url/url_jni_headers/Origin_jni.h"
+#include "url/origin_jni_headers/Origin_jni.h"
namespace url {
diff --git a/url/gurl.cc b/url/gurl.cc
index 6930f73b6..9f2e5fad6 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -13,6 +13,7 @@
#include "base/check_op.h"
#include "base/no_destructor.h"
+#include "base/notreached.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/trace_event/base_tracing.h"
@@ -158,7 +159,9 @@ const std::string& GURL::spec() const {
if (is_valid_ || spec_.empty())
return spec_;
- DCHECK(false) << "Trying to get the spec of an invalid URL!";
+ // TODO(crbug.com/851128): Make sure this no longer hits before making
+ // NOTREACHED_NORETURN();
+ NOTREACHED() << "Trying to get the spec of an invalid URL!";
return base::EmptyString();
}
diff --git a/url/third_party/mozilla/README.chromium b/url/third_party/mozilla/README.chromium
index ef396d3d1..9df199239 100644
--- a/url/third_party/mozilla/README.chromium
+++ b/url/third_party/mozilla/README.chromium
@@ -1,7 +1,11 @@
Name: url_parse
-URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp
+Version: unknown
+URL: https://hg.mozilla.org/mozilla-central/file/tip/netwerk/base/nsURLParsers.cpp
License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1
License File: LICENSE.txt
+Shipped: yes
+Security Critical: yes
+CPEPrefix: unknown
Description:
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index f6219209e..574a5ed0d 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -81,10 +81,11 @@ void DoAppendStringOfType(const CHAR* source,
}
for (; i < length; i++) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
- // ReadChar will fill the code point with kUnicodeReplacementCharacter
- // when the input is invalid, which is what we want.
+ // ReadUTFCharLossy will fill the code point with
+ // kUnicodeReplacementCharacter when the input is invalid, which is what
+ // we want.
base_icu::UChar32 code_point;
- ReadUTFChar(source, &i, length, &code_point);
+ ReadUTFCharLossy(source, &i, length, &code_point);
AppendUTF8EscapedValue(code_point, output);
} else {
// Just append the 7-bit character, possibly escaping it.
@@ -312,24 +313,22 @@ void AppendStringOfType(const char16_t* source,
DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
-bool ReadUTFChar(const char* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out) {
- if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
- !base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
+ if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
return false;
}
return true;
}
-bool ReadUTFChar(const char16_t* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out) {
- if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
- !base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
+ if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
return false;
}
@@ -356,7 +355,7 @@ bool ConvertUTF16ToUTF8(const char16_t* input,
bool success = true;
for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
- success &= ReadUTFChar(input, &i, input_len, &code_point);
+ success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
AppendUTF8Value(code_point, output);
}
return success;
@@ -368,7 +367,7 @@ bool ConvertUTF8ToUTF16(const char* input,
bool success = true;
for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
- success &= ReadUTFChar(input, &i, input_len, &code_point);
+ success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
AppendUTF16Value(code_point, output);
}
return success;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 13481f5fd..0f1400d56 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -144,19 +144,19 @@ extern const base_icu::UChar32 kUnicodeReplacementCharacter;
// UTF-8 functions ------------------------------------------------------------
-// Reads one character in UTF-8 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-8 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
//
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-byte ASCII character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
// character that should be appended, with the given output method. Wrappers
@@ -216,19 +216,19 @@ inline void AppendUTF8EscapedValue(base_icu::UChar32 char_value,
// UTF-16 functions -----------------------------------------------------------
-// Reads one character in UTF-16 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-16 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
//
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char16_t* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
inline void AppendUTF16Value(base_icu::UChar32 code_point,
@@ -266,11 +266,11 @@ inline bool AppendUTF8EscapedChar(const char16_t* str,
size_t* begin,
size_t length,
CanonOutput* output) {
- // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
- // us the kUnicodeReplacementCharacter, so we don't have to do special
+ // UTF-16 input. ReadUTFCharLossy will handle invalid characters for us and
+ // give us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
base_icu::UChar32 char_value;
- bool success = ReadUTFChar(str, begin, length, &char_value);
+ bool success = ReadUTFCharLossy(str, begin, length, &char_value);
AppendUTF8EscapedValue(char_value, output);
return success;
}
@@ -280,11 +280,11 @@ inline bool AppendUTF8EscapedChar(const char* str,
size_t* begin,
size_t length,
CanonOutput* output) {
- // ReadUTF8Char will handle invalid characters for us and give us the
+ // ReadUTFCharLossy will handle invalid characters for us and give us the
// kUnicodeReplacementCharacter, so we don't have to do special checking
// after failure, just pass through the failure to the caller.
base_icu::UChar32 ch;
- bool success = ReadUTFChar(str, begin, length, &ch);
+ bool success = ReadUTFCharLossy(str, begin, length, &ch);
AppendUTF8EscapedValue(ch, output);
return success;
}
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 676468d5d..2cd84b409 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -6,6 +6,7 @@
#include "base/check.h"
#include "base/check_op.h"
+#include "base/metrics/histogram_functions.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -37,25 +38,24 @@ enum CharacterFlags {
// ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these
// characters unescaped, they should just be copied.
UNESCAPE = 4,
-
- // This character is disallowed in URLs. Note that the "special" bit is also
- // set to trigger handling.
- INVALID_BIT = 8,
- INVALID = INVALID_BIT | SPECIAL,
};
// This table contains one of the above flag values. Note some flags are more
// than one bits because they also turn on the "special" flag. Special is the
// only flag that may be combined with others.
//
-// This table is designed to match exactly what IE does with the characters.
+// This table was used to be designed to match exactly what IE did with the
+// characters, however, which doesn't comply with the URL Standard as of Jun
+// 2023. See http://crbug.com/1400251 and http://crbug.com/1252531 for efforts
+// to comply with the URL Standard.
//
// Dot is even more special, and the escaped version is handled specially by
// IsDot. Therefore, we don't need the "escape" flag, and even the "unescape"
// bit is never handled (we just need the "special") bit.
+// clang-format off
const unsigned char kPathCharLookup[0x100] = {
// NULL control chars...
- INVALID, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
+ ESCAPE , ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
// control chars...
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
// ' ' ! " # $ % & ' ( ) * + , - . /
@@ -79,6 +79,7 @@ const unsigned char kPathCharLookup[0x100] = {
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE};
+// clang-format on
enum DotDisposition {
// The given dot is just part of a filename and is not special.
@@ -266,6 +267,7 @@ bool DoPartialPathInternal(const CHAR* spec,
absl::optional<size_t> last_invalid_percent_index;
bool success = true;
+ bool unescape_escaped_char = false;
for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -333,6 +335,8 @@ bool DoPartialPathInternal(const CHAR* spec,
if (unescaped_flags & UNESCAPE) {
// This escaped value shouldn't be escaped. Try to copy it.
+ unescape_escaped_char = true;
+
output->push_back(unescaped_value);
// If we just unescaped a value within 2 output characters of the
// '%' from a previously-detected invalid escape sequence, we
@@ -354,8 +358,6 @@ bool DoPartialPathInternal(const CHAR* spec,
output->push_back('%');
output->push_back(static_cast<char>(spec[i - 1]));
output->push_back(static_cast<char>(spec[i]));
- if (unescaped_flags & INVALID_BIT)
- success = false;
}
} else {
// Invalid escape sequence. IE7+ rejects any URLs with such
@@ -366,12 +368,6 @@ bool DoPartialPathInternal(const CHAR* spec,
last_invalid_percent_index = output->length();
output->push_back('%');
}
-
- } else if (flags & INVALID_BIT) {
- // For NULLs, etc. fail.
- AppendEscapedChar(out_ch, output);
- success = false;
-
} else if (flags & ESCAPE_BIT) {
// This character should be escaped.
AppendEscapedChar(out_ch, output);
@@ -382,6 +378,8 @@ bool DoPartialPathInternal(const CHAR* spec,
}
}
}
+ base::UmaHistogramBoolean("URL.Path.UnescapeEscapedChar",
+ unescape_escaped_char);
return success;
}
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index dee00d86e..0fe0cb34f 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -10,6 +10,7 @@
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
+#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -335,13 +336,10 @@ TEST_P(URLCanonHostTest, Host) {
L"bar.com",
"www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
// Invalid unicode characters should fail...
- // ...In wide input, ICU will barf and we'll end up with the input as
- // escaped UTF-8 (the invalid character should be replaced with the
- // replacement character).
- {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+ {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%B7%90zyx.com",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
// ...This is the same as previous but with with escaped.
- {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+ {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%B7%90zyx.com",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
// Test name prepping, fullwidth input should be converted to ASCII and
// NOT
@@ -1083,7 +1081,7 @@ TEST(URLCanonTest, CanonicalizeHostSubstring) {
test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
Component(0, 8), &output));
output.Complete();
- EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+ EXPECT_EQ("%EF%B7%90zyx.com", out_str);
}
// Should return true for empty input strings.
@@ -1282,8 +1280,8 @@ DualComponentCase kCommonPathCases[] = {
// Funny characters that are unescaped should be escaped
{"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
{nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
- // Invalid characters that are escaped should cause a failure.
- {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+ // %00 should not cause failures.
+ {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), true},
// Some characters should be passed through unchanged regardless of esc.
{"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
true},
@@ -1325,10 +1323,9 @@ DualComponentCase kCommonPathCases[] = {
{"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
Component(0, 37), true},
- // Invalid unicode characters should fail. We only do validation on
- // UTF-16 input, so this doesn't happen on 8-bit.
+ // Unicode Noncharacter (U+FDD0) should not fail.
{"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
- {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+ {nullptr, L"/\xfdd0zyx", "/%EF%B7%90zyx", Component(0, 13), true},
};
typedef bool (*CanonFunc8Bit)(const char*,
@@ -1390,7 +1387,7 @@ TEST(URLCanonTest, Path) {
CanonicalizePath);
// Manual test: embedded NULLs should be escaped and the URL should be marked
- // as invalid.
+ // as valid.
const char path_with_null[] = "/ab\0c";
Component in_comp(0, 5);
Component out_comp;
@@ -1399,7 +1396,7 @@ TEST(URLCanonTest, Path) {
StdStringCanonOutput output(&out_str);
bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);
output.Complete();
- EXPECT_FALSE(success);
+ EXPECT_TRUE(success);
EXPECT_EQ("/ab%00c", out_str);
}
@@ -1507,7 +1504,7 @@ TEST(URLCanonTest, Ref) {
{"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true},
{nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
// Test a Unicode invalid character.
- {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true},
+ {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%B7%90", Component(1, 10), true},
// Refs can have # signs and we should preserve them.
{"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true},
{"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
@@ -2129,9 +2126,9 @@ TEST(URLCanonTest, CanonicalizePathURL) {
{"JavaScript:Foo", "javascript:Foo"},
{"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
- // Validation errors should not cause failure. See
+ // Unicode invalid characters should not cause failure. See
// https://crbug.com/925614.
- {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
+ {"javascript:\uFFFF", "javascript:%EF%BF%BF"},
};
for (size_t i = 0; i < std::size(path_cases); i++) {
@@ -2169,7 +2166,7 @@ TEST(URLCanonTest, CanonicalizePathURLPath) {
{"Foo", L"Foo", "Foo"},
{"\":This /is interesting;?#", L"\":This /is interesting;?#",
"\":This /is interesting;?#"},
- {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+ {"\uFFFF", L"\uFFFF", "%EF%BF%BF"},
};
for (size_t i = 0; i < std::size(path_cases); i++) {
@@ -2745,4 +2742,27 @@ TEST(URLCanonTest, IDNToASCII) {
output.set_length(0);
}
+TEST(URLCanonTest, UnescapePathCharHistogram) {
+ struct TestCase {
+ base::StringPiece path;
+ base::HistogramBase::Count cnt;
+ } cases[] = {
+ {"/a", 0},
+ {"/%61", 1},
+ {"/%61%61", 1},
+ };
+
+ for (const auto& c : cases) {
+ base::HistogramTester histogram_tester;
+ Component in_comp(0, c.path.size());
+ Component out_comp;
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ bool success = CanonicalizePath(c.path.data(), in_comp, &output, &out_comp);
+ ASSERT_TRUE(success);
+ histogram_tester.ExpectBucketCount("URL.Path.UnescapeEscapedChar", 1,
+ c.cnt);
+ }
+}
+
} // namespace url
diff --git a/url/url_features.cc b/url/url_features.cc
index 8f38ff257..858e82790 100644
--- a/url/url_features.cc
+++ b/url/url_features.cc
@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "url/url_features.h"
+#include "base/feature_list.h"
namespace url {
@@ -17,7 +18,7 @@ BASE_FEATURE(kRecordIDNA2008Metrics,
BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing,
"StrictIPv4EmbeddedIPv6AddressParsing",
- base::FEATURE_DISABLED_BY_DEFAULT);
+ base::FEATURE_ENABLED_BY_DEFAULT);
// Kill switch for crbug.com/1220361.
BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
@@ -25,6 +26,13 @@ BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
base::FEATURE_ENABLED_BY_DEFAULT);
bool IsUsingIDNA2008NonTransitional() {
+ // If the FeatureList isn't available yet, fall back to the feature's default
+ // state. This may happen during early startup, see crbug.com/1441956.
+ if (!base::FeatureList::GetInstance()) {
+ return kUseIDNA2008NonTransitional.default_state ==
+ base::FEATURE_ENABLED_BY_DEFAULT;
+ }
+
return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
}
diff --git a/url/url_util.cc b/url/url_util.cc
index 67913eb72..001c50e72 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -885,8 +885,8 @@ void DecodeURLEscapeSequences(const char* input,
// character.
size_t next_character = i;
base_icu::UChar32 code_point;
- if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
- &code_point)) {
+ if (ReadUTFCharLossy(unescaped_chars.data(), &next_character,
+ unescaped_length, &code_point)) {
// Valid UTF-8 character, convert to UTF-16.
AppendUTF16Value(code_point, output);
i = next_character;
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index e1d7801b0..3a2e6e6d5 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -247,6 +247,13 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
{"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
"pqrstuvwxyz{|}~\x7f/"},
{"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+ // U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement
+ // Character) (http://crbug.com/1416021)
+ {"%ef%bf%bf", "\xef\xbf\xbf"},
+ // U+FDD0 (Noncharacter)
+ {"%ef%b7%90", "\xef\xb7\x90"},
+ // U+FFFD (Replacement Character)
+ {"%ef%bf%bd", "\xef\xbf\xbd"},
};
for (size_t i = 0; i < std::size(decode_cases); i++) {