summaryrefslogtreecommitdiff
path: root/url
diff options
context:
space:
mode:
authorCronet Mainline Eng <cronet-mainline-eng+copybara@google.com>2024-01-02 11:58:25 +0000
committerMohannad Farrag <aymanm@google.com>2024-01-02 12:02:18 +0000
commita593a16fd9fcd0dd4906673341bc921abb285b97 (patch)
tree6bca400c3096478188c12c7bf183d8652e8c8591 /url
parentec3a8e8db24bb3ce4b078106b358ca1c4389c14f (diff)
downloadcronet-a593a16fd9fcd0dd4906673341bc921abb285b97.tar.gz
Import Cronet version 121.0.6103.2
FolderOrigin-RevId: /tmp/copybara-origin/src Change-Id: I690becfaba7ad4293eba08b4f9d1aa7f953fce20
Diffstat (limited to 'url')
-rw-r--r--url/BUILD.gn310
-rw-r--r--url/android/gurl_android.cc42
-rw-r--r--url/android/gurl_android.h3
-rw-r--r--url/android/gurl_test_init.cc27
-rw-r--r--url/android/gurl_test_init.h11
-rw-r--r--url/android/java/src/org/chromium/url/GURL.java55
-rw-r--r--url/android/java/src/org/chromium/url/IDNStringUtil.java4
-rw-r--r--url/android/java/src/org/chromium/url/Origin.java6
-rw-r--r--url/android/java/src/org/chromium/url/Parsed.java6
-rw-r--r--url/android/javatests/src/org/chromium/url/GURLJavaTest.java135
-rw-r--r--url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java6
-rw-r--r--url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java73
-rw-r--r--url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java4
-rw-r--r--url/android/junit/src/org/chromium/url/ShadowGURLTest.java70
-rw-r--r--url/android/origin_android.cc61
-rw-r--r--url/android/origin_java_test_helper.cc3
-rw-r--r--url/android/parsed_android.cc2
-rw-r--r--url/android/parsed_android.h2
-rw-r--r--url/android/robolectric_jni_onload.cc (renamed from url/android/robolectric_test_main.cc)6
-rw-r--r--url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java192
-rw-r--r--url/android/test/java/src/org/chromium/url/ShadowGURL.java62
-rw-r--r--url/gurl.cc46
-rw-r--r--url/gurl.h66
-rw-r--r--url/gurl_abstract_tests.h4
-rw-r--r--url/gurl_fuzzer.cc10
-rw-r--r--url/gurl_unittest.cc6
-rw-r--r--url/ipc/BUILD.gn1
-rw-r--r--url/mojom/BUILD.gn12
-rw-r--r--url/mojom/origin_mojom_traits.cc4
-rw-r--r--url/mojom/scheme_host_port_mojom_traits.cc5
-rw-r--r--url/mojom/url_gurl_mojom_traits.cc12
-rw-r--r--url/mojom/url_gurl_mojom_traits.h5
-rw-r--r--url/origin.cc17
-rw-r--r--url/origin.h43
-rw-r--r--url/origin_abstract_tests.cc6
-rw-r--r--url/origin_abstract_tests.h16
-rw-r--r--url/origin_unittest.cc8
-rw-r--r--url/scheme_host_port.cc22
-rw-r--r--url/scheme_host_port.h10
-rw-r--r--url/url_canon.h11
-rw-r--r--url/url_canon_filesystemurl.cc4
-rw-r--r--url/url_canon_fileurl.cc9
-rw-r--r--url/url_canon_host.cc49
-rw-r--r--url/url_canon_internal.cc5
-rw-r--r--url/url_canon_internal.h13
-rw-r--r--url/url_canon_ip.cc4
-rw-r--r--url/url_canon_mailtourl.cc2
-rw-r--r--url/url_canon_path.cc149
-rw-r--r--url/url_canon_stdstring.h30
-rw-r--r--url/url_canon_unittest.cc1157
-rw-r--r--url/url_features.cc20
-rw-r--r--url/url_features.h13
-rw-r--r--url/url_idna_icu.cc8
-rw-r--r--url/url_idna_icu_alternatives_android.cc7
-rw-r--r--url/url_idna_icu_alternatives_ios.mm8
-rw-r--r--url/url_parse_perftest.cc11
-rw-r--r--url/url_parse_unittest.cc403
-rw-r--r--url/url_util.cc63
-rw-r--r--url/url_util.h28
-rw-r--r--url/url_util_unittest.cc180
60 files changed, 1736 insertions, 1811 deletions
diff --git a/url/BUILD.gn b/url/BUILD.gn
index 8f5fea125..c525c1669 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn
@@ -7,8 +7,8 @@ import("//testing/libfuzzer/fuzzer_test.gni")
import("//testing/test.gni")
import("features.gni")
-import("//build/config/android/jni.gni")
import("//build/config/cronet/config.gni")
+import("//third_party/jni_zero/jni_zero.gni")
if (is_android || is_robolectric) {
import("//build/config/android/rules.gni")
@@ -62,10 +62,33 @@ component("url") {
defines = [ "IS_URL_IMPL" ]
- public_deps = [ "//base" ]
-
deps = [ "//base/third_party/dynamic_annotations" ]
+ public_deps = [
+ "//base",
+ "//build:robolectric_buildflags",
+ ]
+
+ configs += [ "//build/config/compiler:wexit_time_destructors" ]
+
+ if (is_android || is_robolectric) {
+ deps += [ ":url_jni_headers" ]
+ if (!is_cronet_build) {
+ sources += [
+ "android/gurl_android.cc",
+ "android/gurl_android.h",
+ "android/origin_android.cc",
+ "android/parsed_android.cc",
+ "android/parsed_android.h",
+ ]
+ }
+ }
+
+ if (is_robolectric) {
+ # Make jni.h available.
+ public_configs = [ "//third_party/jdk" ]
+ }
+
if (is_win) {
# Don't conflict with Windows' "url.dll".
output_name = "url_lib"
@@ -75,13 +98,7 @@ component("url") {
if (use_platform_icu_alternatives) {
if (is_android) {
sources += [ "url_idna_icu_alternatives_android.cc" ]
- deps += [
- ":buildflags",
- ":url_java",
- ":url_jni_headers",
- "//base",
- "//base/third_party/dynamic_annotations",
- ]
+ deps += [ ":buildflags" ]
} else if (is_ios) {
sources += [ "url_idna_icu_alternatives_ios.mm" ]
} else {
@@ -105,87 +122,43 @@ component("url") {
if (is_android || is_robolectric) {
generate_jni("url_jni_headers") {
sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
- }
-
- generate_jni("origin_jni_headers") {
- sources = [ "android/java/src/org/chromium/url/Origin.java" ]
- }
-
- generate_jni("gurl_jni_headers") {
- sources = [
- "android/java/src/org/chromium/url/GURL.java",
- "android/java/src/org/chromium/url/Parsed.java",
- ]
- }
-
- source_set("gurl_android") {
- sources = [
- "android/gurl_android.cc",
- "android/gurl_android.h",
- "android/parsed_android.cc",
- "android/parsed_android.h",
- ]
-
- deps = [
- ":gurl_jni_headers",
- ":url",
- "//base:base",
- ]
-
- if (is_robolectric) {
- # Make jni.h available.
- configs += [ "//third_party/jdk" ]
+ if (!is_cronet_build) {
+ sources += [
+ "android/java/src/org/chromium/url/GURL.java",
+ "android/java/src/org/chromium/url/Origin.java",
+ "android/java/src/org/chromium/url/Parsed.java",
+ ]
}
}
-
- static_library("origin_android") {
- sources = [ "android/origin_android.cc" ]
-
- deps = [
- ":gurl_android",
- ":origin_jni_headers",
- ":url",
- "//base",
- ]
- }
}
+if (is_android && current_toolchain == default_toolchain) {
+ # TODO(agrieve): Remove alias once usages are removed.
+ java_group("gurl_java") {
+ deps = [ ":url_java" ]
+ }
-if (is_android) {
android_library("url_java") {
sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
- deps = [ "//base:jni_java" ]
- }
-}
-
-if (is_android && !is_cronet_build) {
- android_library("gurl_java") {
- srcjar_deps = [ ":gurl_jni_headers" ]
- sources = [
- "android/java/src/org/chromium/url/GURL.java",
- "android/java/src/org/chromium/url/Parsed.java",
- "android/java/src/org/chromium/url/URI.java",
- ]
- deps = [
- "//base:base_java",
- "//base:jni_java",
- "//build/android:build_java",
- "//third_party/android_deps:com_google_errorprone_error_prone_annotations_java",
- "//third_party/androidx:androidx_annotation_annotation_java",
- "//url/mojom:url_mojom_gurl_java",
- ]
- }
-
- android_library("origin_java") {
- srcjar_deps = [ ":origin_jni_headers" ]
- sources = [ "android/java/src/org/chromium/url/Origin.java" ]
- deps = [
- ":gurl_java",
- "//base:jni_java",
- "//build/android:build_java",
- "//mojo/public/java:bindings_java",
- "//mojo/public/mojom/base:base_java",
- "//url/mojom:url_mojom_origin_java",
- ]
+ srcjar_deps = [ ":url_jni_headers" ]
+ deps = [ "//third_party/jni_zero:jni_zero_java" ]
+ if (!is_cronet_build) {
+ sources += [
+ "android/java/src/org/chromium/url/GURL.java",
+ "android/java/src/org/chromium/url/Origin.java",
+ "android/java/src/org/chromium/url/Parsed.java",
+ "android/java/src/org/chromium/url/URI.java",
+ ]
+ deps += [
+ "//base:base_java",
+ "//build/android:build_java",
+ "//mojo/public/java:bindings_java",
+ "//mojo/public/mojom/base:base_java",
+ "//third_party/android_deps:com_google_errorprone_error_prone_annotations_java",
+ "//third_party/androidx:androidx_annotation_annotation_java",
+ "//url/mojom:url_mojom_gurl_java",
+ "//url/mojom:url_mojom_origin_java",
+ ]
+ }
}
}
@@ -299,95 +272,69 @@ if (is_android && !is_cronet_build) {
"android/origin_java_test_helper.cc",
]
deps = [
- ":gurl_android",
- ":j_test_jni_headers",
- ":origin_android",
+ ":j_test_jni_headers($default_toolchain)",
":url",
"//base/test:test_support",
]
}
- android_library("android_test_helper_java") {
- testonly = true
-
- srcjar_deps = [ ":j_test_jni_headers" ]
- sources = [
- "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
- "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
- ]
- deps = [
- ":gurl_java",
- ":origin_java",
- "//base:base_java_test_support",
- "//base:jni_java",
- ]
- }
-
- # Targets depending on gurl_junit_test_support do not need to bypass platform
- # checks.
- android_library("gurl_junit_test_support") {
- testonly = true
- sources = [ "android/test/java/src/org/chromium/url/JUnitTestGURLs.java" ]
- deps = [ ":gurl_java" ]
- }
+ if (current_toolchain == default_toolchain) {
+ android_library("android_test_helper_java") {
+ testonly = true
- # Unlike gurl_junit_test_support targets depending on gurl_junit_shadows must
- # bypass platform checks.
- robolectric_library("gurl_junit_shadows") {
- sources = [ "android/test/java/src/org/chromium/url/ShadowGURL.java" ]
- deps = [
- ":gurl_java",
- ":gurl_junit_test_support",
- ]
- }
+ srcjar_deps = [ ":j_test_jni_headers" ]
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+ ]
+ deps = [
+ ":url_java",
+ "//base:base_java_test_support",
+ "//third_party/jni_zero:jni_zero_java",
+ ]
+ }
- android_library("url_java_unit_tests") {
- testonly = true
- sources = [
- "android/javatests/src/org/chromium/url/GURLJavaTest.java",
- "android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java",
- "android/javatests/src/org/chromium/url/OriginJavaTest.java",
- ]
- deps = [
- ":android_test_helper_java",
- ":gurl_java",
- ":gurl_junit_test_support",
- ":origin_java",
- "//base:base_java",
- "//base:base_java_test_support",
- "//base:jni_java",
- "//content/public/test/android:content_java_test_support",
- "//mojo/public/mojom/base:base_java",
- "//third_party/androidx:androidx_core_core_java",
- "//third_party/androidx:androidx_test_runner_java",
- "//third_party/junit",
- "//third_party/mockito:mockito_java",
- "//url/mojom:url_mojom_gurl_java",
- "//url/mojom:url_mojom_origin_java",
- ]
- }
+ # Targets depending on gurl_junit_test_support do not need to bypass platform
+ # checks.
+ android_library("gurl_junit_test_support") {
+ testonly = true
+ sources = [ "android/test/java/src/org/chromium/url/JUnitTestGURLs.java" ]
+ deps = [ ":url_java" ]
+ }
- # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
- # can't put 'java' in the name here.
- generate_jni("j_test_jni_headers") {
- testonly = true
- sources = [
- "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
- "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
- ]
- }
+ android_library("url_java_unit_tests") {
+ testonly = true
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTest.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTest.java",
+ ]
+ deps = [
+ ":android_test_helper_java",
+ ":gurl_junit_test_support",
+ ":url_java",
+ "//base:base_java",
+ "//base:base_java_test_support",
+ "//content/public/test/android:content_java_test_support",
+ "//mojo/public/mojom/base:base_java",
+ "//third_party/androidx:androidx_core_core_java",
+ "//third_party/androidx:androidx_test_runner_java",
+ "//third_party/jni_zero:jni_zero_java",
+ "//third_party/junit",
+ "//third_party/mockito:mockito_java",
+ "//url/mojom:url_mojom_gurl_java",
+ "//url/mojom:url_mojom_origin_java",
+ ]
+ }
- robolectric_library("gurl_junit_tests") {
- sources = [ "android/junit/src/org/chromium/url/ShadowGURLTest.java" ]
- deps = [
- ":gurl_java",
- ":gurl_junit_shadows",
- ":gurl_junit_test_support",
- "//base:base_java_test_support",
- "//base:base_junit_test_support",
- "//base/test:test_support_java",
- "//third_party/junit",
- ]
+ # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
+ # can't put 'java' in the name here.
+ generate_jni("j_test_jni_headers") {
+ testonly = true
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+ ]
+ }
}
}
@@ -397,15 +344,34 @@ if (!is_cronet_build && target_os == "android") {
shared_library_with_jni("libgurl_robolectric") {
testonly = true
enable_target = is_robolectric
- sources = [ "android/robolectric_test_main.cc" ]
- deps = [
- "//base",
- "//url:gurl_android",
+ java_targets = [
+ ":gurl_java",
+ ":url_java",
]
+ deps = [ ":robolectric_jni_onload($robolectric_toolchain)" ]
+ }
+ if (is_robolectric) {
+ # Depend on this if you need to write a custom JNI_OnLoad()
+ source_set("gurl_test_init") {
+ testonly = true
+ sources = [
+ "android/gurl_test_init.cc",
+ "android/gurl_test_init.h",
+ ]
+ deps = [ ":url" ]
+ }
- # Make jni.h available.
- configs += [ "//third_party/jdk" ]
+ # Depend on this if you do not need to write a custom JNI_OnLoad()
+ source_set("robolectric_jni_onload") {
+ testonly = true
+ sources = [ "android/robolectric_jni_onload.cc" ]
+ deps = [
+ ":gurl_test_init",
+ "//base",
+ ]
- java_targets = [ "//chrome/android:chrome_junit_tests" ]
+ # Make jni.h available.
+ configs += [ "//third_party/jdk" ]
+ }
}
}
diff --git a/url/android/gurl_android.cc b/url/android/gurl_android.cc
index bf398a132..8de79bbcd 100644
--- a/url/android/gurl_android.cc
+++ b/url/android/gurl_android.cc
@@ -15,9 +15,10 @@
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/memory/ptr_util.h"
+#include "base/strings/string_util.h"
#include "url/android/parsed_android.h"
-#include "url/gurl_jni_headers/GURL_jni.h"
#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_jni_headers/GURL_jni.h"
using base::android::AttachCurrentThread;
using base::android::JavaParamRef;
@@ -49,6 +50,10 @@ static std::unique_ptr<GURL> FromJavaGURL(JNIEnv* env,
static void InitFromGURL(JNIEnv* env,
const GURL& gurl,
const JavaRef<jobject>& target) {
+ // Ensure that the spec only contains US-ASCII (single-byte characters) or the
+ // parsed indices will be wrong as the indices are in bytes while Java Strings
+ // are always 16-bit.
+ DCHECK(base::IsStringASCII(gurl.possibly_invalid_spec()));
Java_GURL_init(
env, target,
base::android::ConvertUTF8ToJavaString(env, gurl.possibly_invalid_spec()),
@@ -157,4 +162,39 @@ static jlong JNI_GURL_CreateNative(JNIEnv* env,
FromJavaGURL(env, j_spec, is_valid, parsed_ptr).release());
}
+static void JNI_GURL_ReplaceComponents(
+ JNIEnv* env,
+ const JavaParamRef<jstring>& j_spec,
+ jboolean is_valid,
+ jlong parsed_ptr,
+ const JavaParamRef<jstring>& j_username_replacement,
+ jboolean clear_username,
+ const JavaParamRef<jstring>& j_password_replacement,
+ jboolean clear_password,
+ const JavaParamRef<jobject>& j_result) {
+ GURL::Replacements replacements;
+
+ // Replacement strings must remain in scope for ReplaceComponents().
+ std::string username;
+ std::string password;
+
+ if (clear_username) {
+ replacements.ClearUsername();
+ } else if (j_username_replacement) {
+ username = ConvertJavaStringToUTF8(env, j_username_replacement);
+ replacements.SetUsernameStr(username);
+ }
+
+ if (clear_password) {
+ replacements.ClearPassword();
+ } else if (j_password_replacement) {
+ password = ConvertJavaStringToUTF8(env, j_password_replacement);
+ replacements.SetPasswordStr(password);
+ }
+
+ std::unique_ptr<GURL> original =
+ FromJavaGURL(env, j_spec, is_valid, parsed_ptr);
+ InitFromGURL(env, original->ReplaceComponents(replacements), j_result);
+}
+
} // namespace url
diff --git a/url/android/gurl_android.h b/url/android/gurl_android.h
index 8b356070d..98cceaf0f 100644
--- a/url/android/gurl_android.h
+++ b/url/android/gurl_android.h
@@ -8,12 +8,13 @@
#include <memory>
#include "base/android/scoped_java_ref.h"
+#include "base/component_export.h"
#include "base/containers/span.h"
#include "url/gurl.h"
namespace url {
-class GURLAndroid {
+class COMPONENT_EXPORT(URL) GURLAndroid {
public:
static std::unique_ptr<GURL> ToNativeGURL(
JNIEnv* env,
diff --git a/url/android/gurl_test_init.cc b/url/android/gurl_test_init.cc
new file mode 100644
index 000000000..94a66acdd
--- /dev/null
+++ b/url/android/gurl_test_init.cc
@@ -0,0 +1,27 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/android/gurl_test_init.h"
+#include "url/url_util.h"
+
+namespace url {
+// Registers enough to have //url parsing work as expected.
+// Does not directly reference //content or //chrome to save on compile times.
+void RegisterSchemesForRobolectric() {
+ // Schemes from content/common/url_schemes.cc:
+ url::AddStandardScheme("chrome", SCHEME_WITH_HOST);
+ url::AddStandardScheme("chrome-untrusted", SCHEME_WITH_HOST);
+ url::AddStandardScheme("chrome-error", SCHEME_WITH_HOST);
+ url::AddNoAccessScheme("chrome-error");
+
+ // Schemes from chrome/common/chrome_content_client.cc:
+ url::AddStandardScheme("isolated-app", SCHEME_WITH_HOST);
+ url::AddStandardScheme("chrome-native", SCHEME_WITH_HOST);
+ url::AddNoAccessScheme("chrome-native");
+ url::AddStandardScheme("chrome-search", SCHEME_WITH_HOST);
+ url::AddStandardScheme("chrome-distiller", SCHEME_WITH_HOST);
+ url::AddStandardScheme("android-app", SCHEME_WITH_HOST);
+ url::AddLocalScheme("content");
+}
+} // namespace url
diff --git a/url/android/gurl_test_init.h b/url/android/gurl_test_init.h
new file mode 100644
index 000000000..c8f958535
--- /dev/null
+++ b/url/android/gurl_test_init.h
@@ -0,0 +1,11 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ANDROID_GURL_TEST_INIT_H_
+#define URL_ANDROID_GURL_TEST_INIT_H_
+
+namespace url {
+void RegisterSchemesForRobolectric();
+}
+#endif // URL_ANDROID_GURL_TEST_INIT_H_
diff --git a/url/android/java/src/org/chromium/url/GURL.java b/url/android/java/src/org/chromium/url/GURL.java
index 09698e4bb..8aaa4903b 100644
--- a/url/android/java/src/org/chromium/url/GURL.java
+++ b/url/android/java/src/org/chromium/url/GURL.java
@@ -11,11 +11,12 @@ import androidx.annotation.Nullable;
import com.google.errorprone.annotations.DoNotMock;
+import org.jni_zero.CalledByNative;
+import org.jni_zero.JNINamespace;
+import org.jni_zero.NativeMethods;
+
import org.chromium.base.Log;
import org.chromium.base.ThreadUtils;
-import org.chromium.base.annotations.CalledByNative;
-import org.chromium.base.annotations.JNINamespace;
-import org.chromium.base.annotations.NativeMethods;
import org.chromium.base.library_loader.LibraryLoader;
import org.chromium.base.metrics.RecordHistogram;
import org.chromium.base.task.PostTask;
@@ -37,7 +38,7 @@ import java.util.Random;
* reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use.
*/
@JNINamespace("url")
-@DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java")
+@DoNotMock("Create a real instance instead.")
public class GURL {
private static final String TAG = "GURL";
/* package */ static final int SERIALIZER_VERSION = 1;
@@ -135,8 +136,6 @@ public class GURL {
@CalledByNative
private void init(String spec, boolean isValid, Parsed parsed) {
mSpec = spec;
- // Ensure that the spec only contains US-ASCII or the parsed indices will be wrong.
- assert mSpec.matches("\\A\\p{ASCII}*\\z");
mIsValid = isValid;
mParsed = parsed;
}
@@ -267,6 +266,36 @@ public class GURL {
return getNatives().domainIs(mSpec, mIsValid, mParsed.toNativeParsed(), domain);
}
+ /**
+ * Returns a copy of the URL with components replaced. See native GURL::ReplaceComponents().
+ *
+ * <p>Rules for replacement: 1. If a `clear*` boolean param is true, the component will be
+ * removed from the result. 2. Otherwise if the corresponding string param is non-null, its
+ * value will be used to replace the component. 3. If the string is null and the `clear*`
+ * boolean is false, the component will not be modified.
+ *
+ * @param username Username replacement.
+ * @param clearUsername True if the result should not contain a username.
+ * @param password Password replacement.
+ * @param clearPassword True if the result should not contain a password.
+ * @return Copy of the URL with replacements applied.
+ */
+ public GURL replaceComponents(
+ String username, boolean clearUsername, String password, boolean clearPassword) {
+ GURL result = new GURL();
+ getNatives()
+ .replaceComponents(
+ mSpec,
+ mIsValid,
+ mParsed.toNativeParsed(),
+ username,
+ clearUsername,
+ password,
+ clearPassword,
+ result);
+ return result;
+ }
+
@Override
public final int hashCode() {
return mSpec.hashCode();
@@ -405,5 +434,19 @@ public class GURL {
* Reconstructs the native GURL for this Java GURL, returning its native pointer.
*/
long createNative(String spec, boolean isValid, long nativeParsed);
+
+ /**
+ * Reconstructs the native GURL for this Java GURL and initializes |result| with the result
+ * of ReplaceComponents.
+ */
+ void replaceComponents(
+ String spec,
+ boolean isValid,
+ long nativeParsed,
+ String username,
+ boolean clearUsername,
+ String password,
+ boolean clearPassword,
+ GURL result);
}
}
diff --git a/url/android/java/src/org/chromium/url/IDNStringUtil.java b/url/android/java/src/org/chromium/url/IDNStringUtil.java
index 10957b673..3887c2b38 100644
--- a/url/android/java/src/org/chromium/url/IDNStringUtil.java
+++ b/url/android/java/src/org/chromium/url/IDNStringUtil.java
@@ -4,8 +4,8 @@
package org.chromium.url;
-import org.chromium.base.annotations.CalledByNative;
-import org.chromium.base.annotations.JNINamespace;
+import org.jni_zero.CalledByNative;
+import org.jni_zero.JNINamespace;
import java.net.IDN;
diff --git a/url/android/java/src/org/chromium/url/Origin.java b/url/android/java/src/org/chromium/url/Origin.java
index 87ce87066..9426264a0 100644
--- a/url/android/java/src/org/chromium/url/Origin.java
+++ b/url/android/java/src/org/chromium/url/Origin.java
@@ -4,9 +4,9 @@
package org.chromium.url;
-import org.chromium.base.annotations.CalledByNative;
-import org.chromium.base.annotations.JNINamespace;
-import org.chromium.base.annotations.NativeMethods;
+import org.jni_zero.CalledByNative;
+import org.jni_zero.JNINamespace;
+import org.jni_zero.NativeMethods;
/** An origin is either a (scheme, host, port) tuple or is opaque. */
@JNINamespace("url")
diff --git a/url/android/java/src/org/chromium/url/Parsed.java b/url/android/java/src/org/chromium/url/Parsed.java
index 75d12cb9a..d87e3a9f3 100644
--- a/url/android/java/src/org/chromium/url/Parsed.java
+++ b/url/android/java/src/org/chromium/url/Parsed.java
@@ -4,9 +4,9 @@
package org.chromium.url;
-import org.chromium.base.annotations.CalledByNative;
-import org.chromium.base.annotations.JNINamespace;
-import org.chromium.base.annotations.NativeMethods;
+import org.jni_zero.CalledByNative;
+import org.jni_zero.JNINamespace;
+import org.jni_zero.NativeMethods;
/**
* A java wrapper for Parsed, GURL's internal parsed URI representation.
diff --git a/url/android/javatests/src/org/chromium/url/GURLJavaTest.java b/url/android/javatests/src/org/chromium/url/GURLJavaTest.java
index e684e5103..29d613e20 100644
--- a/url/android/javatests/src/org/chromium/url/GURLJavaTest.java
+++ b/url/android/javatests/src/org/chromium/url/GURLJavaTest.java
@@ -30,8 +30,7 @@ import java.net.URISyntaxException;
@RunWith(BaseJUnit4ClassRunner.class)
@Batch(Batch.UNIT_TESTS)
public class GURLJavaTest {
- @Mock
- GURL.Natives mGURLMocks;
+ @Mock GURL.Natives mGURLMocks;
@Before
public void setUp() {
@@ -159,45 +158,47 @@ public class GURLJavaTest {
@SuppressWarnings(value = "AuthLeak")
public void testSerialization() {
GURL cases[] = {
- // Common Standard URLs.
- new GURL("https://www.google.com"),
- new GURL("https://www.google.com/"),
- new GURL("https://www.google.com/maps.htm"),
- new GURL("https://www.google.com/maps/"),
- new GURL("https://www.google.com/index.html"),
- new GURL("https://www.google.com/index.html?q=maps"),
- new GURL("https://www.google.com/index.html#maps/"),
- new GURL("https://foo:bar@www.google.com/maps.htm"),
- new GURL("https://www.google.com/maps/au/index.html"),
- new GURL("https://www.google.com/maps/au/north"),
- new GURL("https://www.google.com/maps/au/north/"),
- new GURL("https://www.google.com/maps/au/index.html?q=maps#fragment/"),
- new GURL("http://www.google.com:8000/maps/au/index.html?q=maps#fragment/"),
- new GURL("https://www.google.com/maps/au/north/?q=maps#fragment"),
- new GURL("https://www.google.com/maps/au/north?q=maps#fragment"),
- // Less common standard URLs.
- new GURL("filesystem:http://www.google.com/temporary/bar.html?baz=22"),
- new GURL("file:///temporary/bar.html?baz=22"),
- new GURL("ftp://foo/test/index.html"),
- new GURL("gopher://foo/test/index.html"),
- new GURL("ws://foo/test/index.html"),
- // Non-standard,
- new GURL("chrome://foo/bar.html"),
- new GURL("httpa://foo/test/index.html"),
- new GURL("blob:https://foo.bar/test/index.html"),
- new GURL("about:blank"),
- new GURL("data:foobar"),
- new GURL("scheme:opaque_data"),
- // Invalid URLs.
- new GURL("foobar"),
- // URLs containing the delimiter
- new GURL("https://www.google.ca/" + GURL.SERIALIZER_DELIMITER + ",foo"),
- new GURL("https://www.foo" + GURL.SERIALIZER_DELIMITER + "bar.com"),
+ // Common Standard URLs.
+ new GURL("https://www.google.com"),
+ new GURL("https://www.google.com/"),
+ new GURL("https://www.google.com/maps.htm"),
+ new GURL("https://www.google.com/maps/"),
+ new GURL("https://www.google.com/index.html"),
+ new GURL("https://www.google.com/index.html?q=maps"),
+ new GURL("https://www.google.com/index.html#maps/"),
+ new GURL("https://foo:bar@www.google.com/maps.htm"),
+ new GURL("https://www.google.com/maps/au/index.html"),
+ new GURL("https://www.google.com/maps/au/north"),
+ new GURL("https://www.google.com/maps/au/north/"),
+ new GURL("https://www.google.com/maps/au/index.html?q=maps#fragment/"),
+ new GURL("http://www.google.com:8000/maps/au/index.html?q=maps#fragment/"),
+ new GURL("https://www.google.com/maps/au/north/?q=maps#fragment"),
+ new GURL("https://www.google.com/maps/au/north?q=maps#fragment"),
+ // Less common standard URLs.
+ new GURL("filesystem:http://www.google.com/temporary/bar.html?baz=22"),
+ new GURL("file:///temporary/bar.html?baz=22"),
+ new GURL("ftp://foo/test/index.html"),
+ new GURL("gopher://foo/test/index.html"),
+ new GURL("ws://foo/test/index.html"),
+ // Non-standard,
+ new GURL("chrome://foo/bar.html"),
+ new GURL("httpa://foo/test/index.html"),
+ new GURL("blob:https://foo.bar/test/index.html"),
+ new GURL("about:blank"),
+ new GURL("data:foobar"),
+ new GURL("scheme:opaque_data"),
+ // Invalid URLs.
+ new GURL("foobar"),
+ // URLs containing the delimiter
+ new GURL("https://www.google.ca/" + GURL.SERIALIZER_DELIMITER + ",foo"),
+ new GURL("https://www.foo" + GURL.SERIALIZER_DELIMITER + "bar.com"),
};
GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
- doThrow(new RuntimeException("Should not re-initialize for deserialization when the "
- + "version hasn't changed."))
+ doThrow(
+ new RuntimeException(
+ "Should not re-initialize for deserialization when the "
+ + "version hasn't changed."))
.when(mGURLMocks)
.init(any(), any());
for (GURL url : cases) {
@@ -215,36 +216,35 @@ public class GURLJavaTest {
@Test
public void testSerializationWithVersionSkew() {
GURL url = new GURL("https://www.google.com");
- String serialization = (GURL.SERIALIZER_VERSION + 1)
- + ",0,0,0,0,foo,https://url.bad,blah,0,".replace(',', GURL.SERIALIZER_DELIMITER)
- + url.getSpec();
+ String serialization =
+ (GURL.SERIALIZER_VERSION + 1)
+ + ",0,0,0,0,foo,https://url.bad,blah,0,"
+ .replace(',', GURL.SERIALIZER_DELIMITER)
+ + url.getSpec();
serialization = prependLengthToSerialization(serialization);
GURL out = GURL.deserialize(serialization);
deepAssertEquals(url, out);
}
- /**
- * Tests that fields that aren't visible to java code are correctly serialized.
- */
+ /** Tests that fields that aren't visible to java code are correctly serialized. */
@SmallTest
@Test
public void testSerializationOfPrivateFields() {
- String serialization = GURL.SERIALIZER_VERSION
- + ",true,"
- // Outer Parsed.
- + "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,false,true,"
- // Inner Parsed.
- + "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,true,false,"
- + "chrome://foo/bar.html";
+ String serialization =
+ GURL.SERIALIZER_VERSION
+ + ",true,"
+ // Outer Parsed.
+ + "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,false,true,"
+ // Inner Parsed.
+ + "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,true,false,"
+ + "chrome://foo/bar.html";
serialization = serialization.replace(',', GURL.SERIALIZER_DELIMITER);
serialization = prependLengthToSerialization(serialization);
GURL url = GURL.deserialize(serialization);
Assert.assertEquals(url.serialize(), serialization);
}
- /**
- * Tests serialized GURL truncated by storage.
- */
+ /** Tests serialized GURL truncated by storage. */
@SmallTest
@Test
public void testTruncatedDeserialization() {
@@ -254,9 +254,7 @@ public class GURLJavaTest {
Assert.assertEquals(url, GURL.emptyGURL());
}
- /**
- * Tests serialized GURL truncated by storage.
- */
+ /** Tests serialized GURL truncated by storage. */
@SmallTest
@Test
public void testCorruptedSerializations() {
@@ -288,6 +286,26 @@ public class GURLJavaTest {
Assert.assertFalse(url1.domainIs("images.google.com"));
}
+ // Test that replaceComponents is hooked up correctly.
+ @SmallTest
+ @Test
+ @SuppressWarnings(value = "AuthLeak")
+ public void testReplaceComponents() {
+ GURL url = new GURL("http://user:pass@google.com:99/foo;bar?q=a#ref");
+
+ GURL unchanged = url.replaceComponents(null, false, null, false);
+ Assert.assertEquals("user", unchanged.getUsername());
+ Assert.assertEquals("pass", unchanged.getPassword());
+
+ GURL cleared = url.replaceComponents(null, true, null, true);
+ Assert.assertTrue(cleared.getUsername().isEmpty());
+ Assert.assertTrue(cleared.getPassword().isEmpty());
+
+ GURL changed = url.replaceComponents("newusername", false, "newpassword", false);
+ Assert.assertEquals("newusername", changed.getUsername());
+ Assert.assertEquals("newpassword", changed.getPassword());
+ }
+
// Tests Mojom conversion.
@SmallTest
@Test
@@ -306,7 +324,8 @@ public class GURLJavaTest {
Assert.assertEquals("", new GURL(new String(new byte[] {1, 1, 1})).toMojom().url);
// Too long.
- Assert.assertEquals("",
+ Assert.assertEquals(
+ "",
new GURL("https://www.google.com/".concat("a".repeat(2 * 1024 * 1024)))
.toMojom()
.url);
diff --git a/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java b/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
index 975b009dc..2999295bd 100644
--- a/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
+++ b/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
@@ -4,9 +4,9 @@
package org.chromium.url;
-import org.chromium.base.annotations.CalledByNative;
-import org.chromium.base.annotations.JNINamespace;
-import org.chromium.base.annotations.NativeMethods;
+import org.jni_zero.CalledByNative;
+import org.jni_zero.JNINamespace;
+import org.jni_zero.NativeMethods;
/**
* Helpers for GURLJavaTest that need to call into native code.
diff --git a/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java b/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
deleted file mode 100644
index a23967c24..000000000
--- a/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2020 The Chromium Authors
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package org.chromium.url;
-
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.doThrow;
-
-import androidx.test.filters.SmallTest;
-
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import org.chromium.base.Log;
-import org.chromium.base.test.BaseJUnit4ClassRunner;
-import org.chromium.base.test.util.Batch;
-
-import java.util.Map;
-
-/**
- * Tests for JUnitTestGURLs.
- */
-@RunWith(BaseJUnit4ClassRunner.class)
-@Batch(Batch.UNIT_TESTS)
-public class JUnitTestGURLsTest {
- private static final String TAG = "JUnitTestGURLs";
-
- @Mock
- GURL.Natives mGURLMocks;
-
- @Before
- public void setUp() {
- MockitoAnnotations.initMocks(this);
- }
-
- private RuntimeException getErrorForGURL(GURL gurl) {
- String serialized = gurl.serialize();
- Assert.assertEquals(-1, serialized.indexOf(","));
- serialized = serialized.replace(GURL.SERIALIZER_DELIMITER, ',');
-
- return new RuntimeException("Please update the serialization in JUnitTestGURLs.java for "
- + gurl.getPossiblyInvalidSpec() + " to: '" + serialized + "'");
- }
-
- @SmallTest
- @Test
- public void testGURLEquivalence() throws Throwable {
- doThrow(new RuntimeException("Deserialization required re-initialization."))
- .when(mGURLMocks)
- .init(any(), any());
-
- Throwable exception = null;
- for (Map.Entry<String, String> entry : JUnitTestGURLs.sGURLMap.entrySet()) {
- GURL gurl = new GURL(entry.getKey());
- try {
- GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
- GURL deserialized = JUnitTestGURLs.getGURL(entry.getKey());
- GURLJni.TEST_HOOKS.setInstanceForTesting(null);
- GURLJavaTest.deepAssertEquals(deserialized, gurl);
- } catch (Throwable e) {
- GURLJni.TEST_HOOKS.setInstanceForTesting(null);
- exception = getErrorForGURL(gurl);
- Log.e(TAG, "Error: ", exception);
- }
- }
- if (exception != null) throw exception;
- }
-}
diff --git a/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java b/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
index 2eb9550ba..b4f628a70 100644
--- a/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
+++ b/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
@@ -4,8 +4,8 @@
package org.chromium.url;
-import org.chromium.base.annotations.JNINamespace;
-import org.chromium.base.annotations.NativeMethods;
+import org.jni_zero.JNINamespace;
+import org.jni_zero.NativeMethods;
/**
* Helpers for OriginJavaTest that need to call into native code.
diff --git a/url/android/junit/src/org/chromium/url/ShadowGURLTest.java b/url/android/junit/src/org/chromium/url/ShadowGURLTest.java
deleted file mode 100644
index a491de1a3..000000000
--- a/url/android/junit/src/org/chromium/url/ShadowGURLTest.java
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2021 The Chromium Authors
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package org.chromium.url;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.robolectric.annotation.Config;
-
-import org.chromium.base.test.BaseRobolectricTestRunner;
-
-/** Tests of {@link ShadowGURL}. */
-@RunWith(BaseRobolectricTestRunner.class)
-@Config(shadows = {ShadowGURL.class})
-public class ShadowGURLTest {
- /* package */ static void deepAssertEquals(GURL expected, GURL actual) {
- Assert.assertEquals(expected, actual);
- Assert.assertEquals(expected.getScheme(), actual.getScheme());
- Assert.assertEquals(expected.getUsername(), actual.getUsername());
- Assert.assertEquals(expected.getPassword(), actual.getPassword());
- Assert.assertEquals(expected.getHost(), actual.getHost());
- Assert.assertEquals(expected.getPort(), actual.getPort());
- Assert.assertEquals(expected.getPath(), actual.getPath());
- Assert.assertEquals(expected.getQuery(), actual.getQuery());
- Assert.assertEquals(expected.getRef(), actual.getRef());
- }
-
- @Test
- public void testComponents() {
- GURL url = new GURL(JUnitTestGURLs.SEARCH_URL);
- Assert.assertFalse(url.isEmpty());
- Assert.assertTrue(url.isValid());
-
- Assert.assertEquals(JUnitTestGURLs.SEARCH_URL, url.getSpec());
- Assert.assertEquals("https", url.getScheme());
- Assert.assertEquals("", url.getUsername());
- Assert.assertEquals("", url.getPassword());
- Assert.assertEquals("www.google.com", url.getHost());
- Assert.assertEquals("", url.getPort());
- Assert.assertEquals("/search", url.getPath());
- Assert.assertEquals("q=test", url.getQuery());
- Assert.assertEquals("", url.getRef());
- }
-
- @Test
- public void testEmpty() {
- GURL url = new GURL("");
- Assert.assertFalse(url.isValid());
-
- Assert.assertEquals("", url.getSpec());
- Assert.assertEquals("", url.getScheme());
- Assert.assertEquals("", url.getUsername());
- Assert.assertEquals("", url.getPassword());
- Assert.assertEquals("", url.getHost());
- Assert.assertEquals("", url.getPort());
- Assert.assertEquals("", url.getPath());
- Assert.assertEquals("", url.getQuery());
- Assert.assertEquals("", url.getRef());
- }
-
- @Test
- public void testSerialization() {
- GURL gurl = new GURL(JUnitTestGURLs.URL_1_WITH_PATH);
- GURL deserialized = GURL.deserialize(gurl.serialize());
-
- deepAssertEquals(deserialized, gurl);
- }
-}
diff --git a/url/android/origin_android.cc b/url/android/origin_android.cc
index a75a9a369..b44125ae1 100644
--- a/url/android/origin_android.cc
+++ b/url/android/origin_android.cc
@@ -11,13 +11,22 @@
#include "base/android/scoped_java_ref.h"
#include "base/memory/ptr_util.h"
#include "url/android/gurl_android.h"
-#include "url/origin_jni_headers/Origin_jni.h"
+#include "url/url_jni_headers/Origin_jni.h"
namespace url {
-base::android::ScopedJavaLocalRef<jobject> Origin::CreateJavaObject() const {
+// friend
+Origin CreateOpaqueOriginForAndroid(const std::string& scheme,
+ const std::string& host,
+ uint16_t port,
+ const base::UnguessableToken& nonce_token) {
+ return Origin::CreateOpaqueFromNormalizedPrecursorTuple(
+ scheme, host, port, Origin::Nonce(nonce_token));
+}
+
+base::android::ScopedJavaLocalRef<jobject> Origin::ToJavaObject() const {
JNIEnv* env = base::android::AttachCurrentThread();
- const base::UnguessableToken* token = Origin::GetNonceForSerialization();
+ const base::UnguessableToken* token = GetNonceForSerialization();
return Java_Origin_Constructor(
env, base::android::ConvertUTF8ToJavaString(env, tuple_.scheme()),
base::android::ConvertUTF8ToJavaString(env, tuple_.host()), tuple_.port(),
@@ -34,42 +43,15 @@ Origin Origin::FromJavaObject(
return std::move(*origin);
}
-// static
-jlong Origin::CreateNative(JNIEnv* env,
- const base::android::JavaRef<jstring>& java_scheme,
- const base::android::JavaRef<jstring>& java_host,
- uint16_t port,
- bool is_opaque,
- uint64_t token_high_bits,
- uint64_t token_low_bits) {
- const std::string& scheme = ConvertJavaStringToUTF8(env, java_scheme);
- const std::string& host = ConvertJavaStringToUTF8(env, java_host);
-
- absl::optional<base::UnguessableToken> nonce_token =
- base::UnguessableToken::Deserialize(token_high_bits, token_low_bits);
- bool has_nonce = nonce_token.has_value();
- CHECK(has_nonce == is_opaque);
- Origin::Nonce nonce;
- if (has_nonce) {
- nonce = Origin::Nonce(nonce_token.value());
- }
- Origin origin = is_opaque
- ? Origin::CreateOpaqueFromNormalizedPrecursorTuple(
- scheme, host, port, nonce)
- : Origin::CreateFromNormalizedTuple(scheme, host, port);
- return reinterpret_cast<intptr_t>(new Origin(origin));
-}
-
static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateOpaque(
JNIEnv* env) {
- return Origin().CreateJavaObject();
+ return Origin().ToJavaObject();
}
static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateFromGURL(
JNIEnv* env,
const base::android::JavaParamRef<jobject>& j_gurl) {
- return Origin::Create(*GURLAndroid::ToNativeGURL(env, j_gurl))
- .CreateJavaObject();
+ return Origin::Create(*GURLAndroid::ToNativeGURL(env, j_gurl)).ToJavaObject();
}
static jlong JNI_Origin_CreateNative(
@@ -80,8 +62,19 @@ static jlong JNI_Origin_CreateNative(
jboolean is_opaque,
jlong token_high_bits,
jlong token_low_bits) {
- return Origin::CreateNative(env, java_scheme, java_host, port, is_opaque,
- token_high_bits, token_low_bits);
+ const std::string& scheme = ConvertJavaStringToUTF8(env, java_scheme);
+ const std::string& host = ConvertJavaStringToUTF8(env, java_host);
+
+ Origin origin;
+ if (is_opaque) {
+ absl::optional<base::UnguessableToken> nonce_token =
+ base::UnguessableToken::Deserialize(token_high_bits, token_low_bits);
+ origin =
+ CreateOpaqueOriginForAndroid(scheme, host, port, nonce_token.value());
+ } else {
+ origin = Origin::CreateFromNormalizedTuple(scheme, host, port);
+ }
+ return reinterpret_cast<intptr_t>(new Origin(origin));
}
} // namespace url
diff --git a/url/android/origin_java_test_helper.cc b/url/android/origin_java_test_helper.cc
index 62554d87d..3815d9764 100644
--- a/url/android/origin_java_test_helper.cc
+++ b/url/android/origin_java_test_helper.cc
@@ -21,8 +21,7 @@ static void JNI_OriginJavaTestHelper_TestOriginEquivalence(JNIEnv* env) {
Origin::Create(GURL("http://a.com:8000")).DeriveNewOpaqueOrigin(),
};
for (const Origin& origin : cases) {
- base::android::ScopedJavaLocalRef<jobject> j_origin =
- origin.CreateJavaObject();
+ base::android::ScopedJavaLocalRef<jobject> j_origin = origin.ToJavaObject();
Origin sameOrigin = Origin::FromJavaObject(j_origin);
if (origin != sameOrigin) {
std::stringstream ss;
diff --git a/url/android/parsed_android.cc b/url/android/parsed_android.cc
index 36d8aa255..e3b0d242a 100644
--- a/url/android/parsed_android.cc
+++ b/url/android/parsed_android.cc
@@ -7,7 +7,7 @@
#include <jni.h>
#include "base/android/jni_android.h"
-#include "url/gurl_jni_headers/Parsed_jni.h"
+#include "url/url_jni_headers/Parsed_jni.h"
using base::android::AttachCurrentThread;
using base::android::JavaRef;
diff --git a/url/android/parsed_android.h b/url/android/parsed_android.h
index 244ada55f..81daa4386 100644
--- a/url/android/parsed_android.h
+++ b/url/android/parsed_android.h
@@ -6,12 +6,14 @@
#define URL_ANDROID_PARSED_ANDROID_H_
#include "base/android/scoped_java_ref.h"
+#include "base/component_export.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
class ParsedAndroid {
public:
+ COMPONENT_EXPORT(URL)
static base::android::ScopedJavaLocalRef<jobject> InitFromParsed(
JNIEnv* env,
const Parsed& parsed);
diff --git a/url/android/robolectric_test_main.cc b/url/android/robolectric_jni_onload.cc
index 28fb4d241..d6fd0ee27 100644
--- a/url/android/robolectric_test_main.cc
+++ b/url/android/robolectric_jni_onload.cc
@@ -1,15 +1,15 @@
-// Copyright 2022 The Chromium Authors
+// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <jni.h>
#include "base/android/base_jni_onload.h"
#include "base/android/jni_android.h"
+#include "url/android/gurl_test_init.h"
extern "C" JNI_EXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) {
base::android::InitVM(vm);
base::android::OnJNIOnLoadInit();
- // TODO(1223993): Initialize GURL schemes, like in
- // content::RegisterContentSchemes().
+ url::RegisterSchemesForRobolectric();
return JNI_VERSION_1_4;
}
diff --git a/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java b/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
index 9f19c6c90..2eba46068 100644
--- a/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
+++ b/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
@@ -4,171 +4,35 @@
package org.chromium.url;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
/**
- * A Helper class for JUnit tests to be able to use GURLs without requiring native initialization.
- * This should be used sparingly, when converting junit tests to Batched Instrumentation tests is
- * not feasible.
- *
- * If any more complex GURL behaviour is tested, like comparing Origins, the test should be written
- * as an Instrumentation test instead - you should never mock GURL.
+ * A collection of test GURLs.
*/
public class JUnitTestGURLs {
- // In order to add a test URL:
- // 1. Add the URL String as a constant here.
- // 2. Add the constant to the map below, with a placeholder string for the GURL serialization.
- // 3. Run JUnitTestGURLsTest (eg. './tools/autotest.py -C out/Debug JUnitTestGURLsTest').
- // 4. Check logcat output or test exception for the correct serialization String, and place it
- // in the map.
- public static final String EXAMPLE_URL = "https://www.example.com/";
- public static final String HTTP_URL = "http://www.example.com/";
- public static final String URL_1 = "https://www.one.com/";
- public static final String URL_1_NUMERAL = "https://www.1.com/";
- public static final String URL_1_WITH_PATH = "https://www.one.com/some_path.html";
- public static final String URL_2 = "https://www.two.com/";
- public static final String URL_3 = "https://www.three.com/";
- public static final String MAPS_URL = "https://maps.google.com/";
- public static final String SEARCH_URL = "https://www.google.com/search?q=test";
- public static final String SEARCH_2_URL = "https://www.google.com/search?q=query";
- public static final String INITIAL_URL = "https://initial.com";
- public static final String SPECULATED_URL = "https://speculated.com";
- public static final String NTP_URL = "chrome://newtab/";
- public static final String NTP_NATIVE_URL = "chrome-native://newtab/";
- public static final String DOM_DISILLER_URL = "chrome-distiller://url";
- public static final String RED_1 = "https://www.red.com/page1";
- public static final String RED_2 = "https://www.red.com/page2";
- public static final String RED_3 = "https://www.red.com/page3";
- public static final String BLUE_1 = "https://www.blue.com/page1";
- public static final String BLUE_2 = "https://www.blue.com/page2";
- public static final String BLUE_3 = "https://www.blue.com/page3";
- public static final String AMP_URL =
- "https://www.google.com/amp/www.nyt.com/ampthml/blogs.html";
- public static final String AMP_CACHE_URL =
- "https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html";
- public static final String TEXT_FRAGMENT_URL = "https://www.example.com/#:~:text=selector";
- public static final String MULTI_TEXT_FRAGMENT_URL =
- "https://www.example.com/#:~:text=selector1&text=selector2&text=selector3";
- public static final String INVALID_URL = "http://0x100.0/";
- public static final String GOOGLE_URL = "http://www.google.com/";
- public static final String GOOGLE_URL_DOGS = "http://www.google.com/dogs";
- public static final String GOOGLE_URL_DOGS_FUN = "http://www.google.com/dogs-are-fun";
- public static final String GOOGLE_URL_DOG = "http://www.google.com/dog";
- public static final String GOOGLE_URL_CAT = "http://www.google.com/cat";
- public static final String GOOGLE_URL_PIG = "http://www.google.com/pig";
- public static final String ABOUT_BLANK = "about:blank";
- public static final String CHROME_ABOUT = "chrome://about";
-
- // Map of URL string to GURL serialization.
- /* package */ static final Map<String, String> sGURLMap;
- static {
- Map<String, String> map = new HashMap<>();
- map.put(EXAMPLE_URL,
- "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,"
- + "false,false,https://www.example.com/");
- map.put(HTTP_URL,
- "81,1,true,0,4,0,-1,0,-1,7,15,0,-1,22,1,0,-1,0,-1,"
- + "false,false,http://www.example.com/");
- map.put(URL_1,
- "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
- + "false,false,https://www.one.com/");
- map.put(URL_1_NUMERAL,
- "75,1,true,0,5,0,-1,0,-1,8,9,0,-1,17,1,0,-1,0,-1,"
- + "false,false,https://www.1.com/");
- map.put(URL_1_WITH_PATH,
- "93,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,15,0,-1,0,-1,"
- + "false,false,https://www.one.com/some_path.html");
- map.put(URL_2,
- "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
- + "false,false,https://www.two.com/");
- map.put(URL_3,
- "80,1,true,0,5,0,-1,0,-1,8,13,0,-1,21,1,0,-1,0,-1,false,false,https://www.three.com/");
- map.put(RED_1,
- "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
- + "false,false,https://www.red.com/page1");
- map.put(RED_2,
- "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
- + "false,false,https://www.red.com/page2");
- map.put(RED_3,
- "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
- + "false,false,https://www.red.com/page3");
- map.put(BLUE_1,
- "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
- + "false,false,https://www.blue.com/page1");
- map.put(BLUE_2,
- "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
- + "false,false,https://www.blue.com/page2");
- map.put(BLUE_3,
- "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
- + "false,false,https://www.blue.com/page3");
- map.put(SEARCH_URL,
- "94,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,6,0,-1,"
- + "false,false,https://www.google.com/search?q=test");
- map.put(SEARCH_2_URL,
- "95,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,7,0,-1,"
- + "false,false,https://www.google.com/search?q=query");
- map.put(INITIAL_URL,
- "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
- + "false,false,https://initial.com/");
- map.put(SPECULATED_URL,
- "81,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,1,0,-1,0,-1,"
- + "false,false,https://speculated.com/");
- map.put(NTP_URL,
- "73,1,true,0,6,0,-1,0,-1,9,6,0,-1,15,1,0,-1,0,-1,"
- + "false,false,chrome://newtab/");
- map.put(NTP_NATIVE_URL,
- "82,1,true,0,13,0,-1,0,-1,16,6,0,-1,22,1,0,-1,0,-1,false,false,"
- + "chrome-native://newtab/");
- map.put(DOM_DISILLER_URL,
- "82,1,true,0,16,0,-1,0,-1,19,3,0,-1,22,1,0,-1,0,-1,false,false,"
- + "chrome-distiller://url/");
- map.put(MAPS_URL,
- "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,false,false,https://maps.google.com/");
- map.put(AMP_URL,
- "116,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,35,0,-1,0,-1,false,false,https://www.google.com/amp/www.nyt.com/ampthml/blogs.html");
- map.put(AMP_CACHE_URL,
- "118,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,37,0,-1,0,-1,false,false,https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html");
- map.put(TEXT_FRAGMENT_URL,
- "100,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,16,false,false,https://www.example.com/#:~:text=selector");
- map.put(MULTI_TEXT_FRAGMENT_URL,
- "131,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,47,false,false,https://www.example.com/#:~:text=selector1&text=selector2&text=selector3");
- map.put(INVALID_URL,
- "73,1,false,0,4,0,-1,0,-1,7,7,0,-1,14,1,0,-1,0,-1,false,false,http://0x100.0/");
- map.put(GOOGLE_URL,
- "80,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,1,0,-1,0,-1,false,false,http://www.google.com/");
- map.put(GOOGLE_URL_DOGS,
- "84,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,5,0,-1,0,-1,false,false,http://www.google.com/dogs");
- map.put(GOOGLE_URL_DOGS_FUN,
- "93,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,13,0,-1,0,-1,false,false,http://www.google.com/dogs-are-fun");
- map.put(GOOGLE_URL_DOG,
- "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/dog");
- map.put(GOOGLE_URL_CAT,
- "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/cat");
- map.put(GOOGLE_URL_PIG,
- "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/pig");
- map.put(ABOUT_BLANK,
- "68,1,true,0,5,0,-1,0,-1,0,-1,0,-1,6,5,0,-1,0,-1,false,false,about:blank");
- map.put(CHROME_ABOUT,
- "72,1,true,0,6,0,-1,0,-1,9,5,0,-1,14,1,0,-1,0,-1,false,false,chrome://about/");
- sGURLMap = Collections.unmodifiableMap(map);
- }
-
- /**
- * @return the GURL resulting from parsing the provided url. Must be registered in |sGURLMap|.
- */
- public static GURL getGURL(String url) {
- String serialized = sGURLMap.get(url);
- if (serialized == null) {
- throw new IllegalArgumentException("URL " + url + " not found");
- }
- serialized = serialized.replace(',', GURL.SERIALIZER_DELIMITER);
- GURL gurl = GURL.deserialize(serialized);
- // If you're here looking to use an empty GURL, just use GURL.emptyGURL() directly.
- if (gurl.isEmpty()) {
- throw new RuntimeException("Could not deserialize: " + serialized);
- }
- return gurl;
- }
+ public static final GURL EXAMPLE_URL = new GURL("https://www.example.com/");
+ public static final GURL HTTP_URL = new GURL("http://www.example.com/");
+ public static final GURL URL_1 = new GURL("https://www.one.com/");
+ public static final GURL URL_1_WITH_PATH = new GURL("https://www.one.com/some_path.html");
+ public static final GURL URL_2 = new GURL("https://www.two.com/");
+ public static final GURL URL_3 = new GURL("https://www.three.com/");
+ public static final GURL MAPS_URL = new GURL("https://maps.google.com/");
+ public static final GURL SEARCH_URL = new GURL("https://www.google.com/search?q=test");
+ public static final GURL SEARCH_2_URL = new GURL("https://www.google.com/search?q=query");
+ public static final GURL INITIAL_URL = new GURL("https://initial.com");
+ public static final GURL NTP_URL = new GURL("chrome://newtab/");
+ public static final GURL NTP_NATIVE_URL = new GURL("chrome-native://newtab/");
+ public static final GURL RED_1 = new GURL("https://www.red.com/page1");
+ public static final GURL RED_2 = new GURL("https://www.red.com/page2");
+ public static final GURL RED_3 = new GURL("https://www.red.com/page3");
+ public static final GURL BLUE_1 = new GURL("https://www.blue.com/page1");
+ public static final GURL BLUE_2 = new GURL("https://www.blue.com/page2");
+ public static final GURL BLUE_3 = new GURL("https://www.blue.com/page3");
+ public static final GURL TEXT_FRAGMENT_URL =
+ new GURL("https://www.example.com/#:~:text=selector");
+ public static final GURL INVALID_URL = new GURL("http://0x100.0/");
+ public static final GURL GOOGLE_URL = new GURL("http://www.google.com/");
+ public static final GURL GOOGLE_URL_DOGS = new GURL("http://www.google.com/dogs");
+ public static final GURL GOOGLE_URL_DOG = new GURL("http://www.google.com/dog");
+ public static final GURL GOOGLE_URL_CAT = new GURL("http://www.google.com/cat");
+ public static final GURL ABOUT_BLANK = new GURL("about:blank");
+ public static final GURL CHROME_ABOUT = new GURL("chrome://about");
}
diff --git a/url/android/test/java/src/org/chromium/url/ShadowGURL.java b/url/android/test/java/src/org/chromium/url/ShadowGURL.java
deleted file mode 100644
index 53e1da192..000000000
--- a/url/android/test/java/src/org/chromium/url/ShadowGURL.java
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2021 The Chromium Authors
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package org.chromium.url;
-
-import org.robolectric.annotation.Implementation;
-import org.robolectric.annotation.Implements;
-
-import org.chromium.url.GURL.Natives;
-
-/**
- * Shadow of {@link GURL}. Lets Robolectric tests use {@code GURL} without the native libraries
- * loaded.
- *
- * <p>This shadow can create only GURLs listed in {@link JUnitTestGURLs}.
- */
-@Implements(GURL.class)
-public class ShadowGURL {
- /**
- * The {@link GURL.Natives} implementation used by a shadowed {@link GURL}.
- */
- private static class NativesImpl implements GURL.Natives {
- @Override
- public void init(String url, GURL target) {
- target.initForTesting(JUnitTestGURLs.getGURL(url));
- }
-
- @Override
- public void getOrigin(String spec, boolean isValid, long nativeParsed, GURL target) {
- throw new UnsupportedOperationException(
- "ShadowGURL.NativesImpl#getOrigin is not implemented");
- }
-
- @Override
- public boolean domainIs(String spec, boolean isValid, long nativeParsed, String domain) {
- throw new UnsupportedOperationException(
- "ShadowGURL.NativesImpl#domainIs is not implemented");
- }
-
- @Override
- public long createNative(String spec, boolean isValid, long nativeParsed) {
- throw new UnsupportedOperationException(
- "ShadowGURL.NativesImpl#createNative is not implemented");
- }
- }
- private static final NativesImpl sNativesInstance = new NativesImpl();
-
- /**
- * We could instead shadow {@code GURLJni#get}, but that would require tests using this to load
- * both shadows.
- */
- @Implementation
- protected static Natives getNatives() {
- return sNativesInstance;
- }
-
- @Implementation
- protected static void ensureNativeInitializedForGURL() {
- // Skip native initialization.
- }
-}
diff --git a/url/gurl.cc b/url/gurl.cc
index 9f2e5fad6..018ae1142 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -9,12 +9,12 @@
#include <algorithm>
#include <memory>
#include <ostream>
+#include <string_view>
#include <utility>
#include "base/check_op.h"
#include "base/no_destructor.h"
#include "base/notreached.h"
-#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/trace_event/base_tracing.h"
#include "base/trace_event/memory_usage_estimator.h"
@@ -43,11 +43,11 @@ GURL::GURL(GURL&& other) noexcept
other.parsed_ = url::Parsed();
}
-GURL::GURL(base::StringPiece url_string) {
+GURL::GURL(std::string_view url_string) {
InitCanonical(url_string, true);
}
-GURL::GURL(base::StringPiece16 url_string) {
+GURL::GURL(std::u16string_view url_string) {
InitCanonical(url_string, true);
}
@@ -174,7 +174,7 @@ bool GURL::operator>(const GURL& other) const {
}
// Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::Resolve(base::StringPiece relative) const {
+GURL GURL::Resolve(std::string_view relative) const {
// Not allowed for invalid URLs.
if (!is_valid_)
return GURL();
@@ -200,7 +200,7 @@ GURL GURL::Resolve(base::StringPiece relative) const {
}
// Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::Resolve(base::StringPiece16 relative) const {
+GURL GURL::Resolve(std::u16string_view relative) const {
// Not allowed for invalid URLs.
if (!is_valid_)
return GURL();
@@ -355,7 +355,7 @@ bool GURL::IsAboutSrcdoc() const {
return IsAboutUrl(url::kAboutSrcdocPath);
}
-bool GURL::SchemeIs(base::StringPiece lower_ascii_scheme) const {
+bool GURL::SchemeIs(std::string_view lower_ascii_scheme) const {
DCHECK(base::IsStringASCII(lower_ascii_scheme));
DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
@@ -378,7 +378,7 @@ bool GURL::SchemeIsCryptographic() const {
return SchemeIsCryptographic(scheme_piece());
}
-bool GURL::SchemeIsCryptographic(base::StringPiece lower_ascii_scheme) {
+bool GURL::SchemeIsCryptographic(std::string_view lower_ascii_scheme) {
DCHECK(base::IsStringASCII(lower_ascii_scheme));
DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
@@ -413,13 +413,13 @@ std::string GURL::ExtractFileName() const {
return ComponentString(file_component);
}
-base::StringPiece GURL::PathForRequestPiece() const {
+std::string_view GURL::PathForRequestPiece() const {
DCHECK(parsed_.path.is_nonempty())
<< "Canonical path for requests should be non-empty";
if (parsed_.ref.is_valid()) {
// Clip off the reference when it exists. The reference starts after the
// #-sign, so we have to subtract one to also remove it.
- return base::StringPiece(spec_).substr(
+ return std::string_view(spec_).substr(
parsed_.path.begin, parsed_.ref.begin - parsed_.path.begin - 1);
}
// Compute the actual path length, rather than depending on the spec's
@@ -429,7 +429,7 @@ base::StringPiece GURL::PathForRequestPiece() const {
if (parsed_.query.is_valid())
path_len = parsed_.query.end() - parsed_.path.begin;
- return base::StringPiece(spec_).substr(parsed_.path.begin, path_len);
+ return std::string_view(spec_).substr(parsed_.path.begin, path_len);
}
std::string GURL::PathForRequest() const {
@@ -440,7 +440,7 @@ std::string GURL::HostNoBrackets() const {
return std::string(HostNoBracketsPiece());
}
-base::StringPiece GURL::HostNoBracketsPiece() const {
+std::string_view GURL::HostNoBracketsPiece() const {
// If host looks like an IPv6 literal, strip the square brackets.
url::Component h(parsed_.host);
if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
@@ -454,9 +454,9 @@ std::string GURL::GetContent() const {
return std::string(GetContentPiece());
}
-base::StringPiece GURL::GetContentPiece() const {
+std::string_view GURL::GetContentPiece() const {
if (!is_valid_)
- return base::StringPiece();
+ return std::string_view();
url::Component content_component = parsed_.GetContent();
if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.is_valid())
content_component.len -= parsed_.ref.len + 1;
@@ -472,7 +472,7 @@ const GURL& GURL::EmptyGURL() {
return *empty_gurl;
}
-bool GURL::DomainIs(base::StringPiece canonical_domain) const {
+bool GURL::DomainIs(std::string_view canonical_domain) const {
if (!is_valid_)
return false;
@@ -486,8 +486,8 @@ bool GURL::EqualsIgnoringRef(const GURL& other) const {
int ref_position = parsed_.CountCharactersBefore(url::Parsed::REF, true);
int ref_position_other =
other.parsed_.CountCharactersBefore(url::Parsed::REF, true);
- return base::StringPiece(spec_).substr(0, ref_position) ==
- base::StringPiece(other.spec_).substr(0, ref_position_other);
+ return std::string_view(spec_).substr(0, ref_position) ==
+ std::string_view(other.spec_).substr(0, ref_position_other);
}
void GURL::Swap(GURL* other) {
@@ -503,7 +503,7 @@ size_t GURL::EstimateMemoryUsage() const {
(parsed_.inner_parsed() ? sizeof(url::Parsed) : 0);
}
-bool GURL::IsAboutUrl(base::StringPiece allowed_path) const {
+bool GURL::IsAboutUrl(std::string_view allowed_path) const {
if (!SchemeIs(url::kAboutScheme))
return false;
@@ -514,8 +514,8 @@ bool GURL::IsAboutUrl(base::StringPiece allowed_path) const {
}
// static
-bool GURL::IsAboutPath(base::StringPiece actual_path,
- base::StringPiece allowed_path) {
+bool GURL::IsAboutPath(std::string_view actual_path,
+ std::string_view allowed_path) {
if (!base::StartsWith(actual_path, allowed_path))
return false;
@@ -549,22 +549,22 @@ bool operator!=(const GURL& x, const GURL& y) {
return !(x == y);
}
-bool operator==(const GURL& x, const base::StringPiece& spec) {
+bool operator==(const GURL& x, std::string_view spec) {
DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec)
<< "Comparisons of GURLs and strings must ensure as a precondition that "
"the string is fully canonicalized.";
return x.possibly_invalid_spec() == spec;
}
-bool operator==(const base::StringPiece& spec, const GURL& x) {
+bool operator==(std::string_view spec, const GURL& x) {
return x == spec;
}
-bool operator!=(const GURL& x, const base::StringPiece& spec) {
+bool operator!=(const GURL& x, std::string_view spec) {
return !(x == spec);
}
-bool operator!=(const base::StringPiece& spec, const GURL& x) {
+bool operator!=(std::string_view spec, const GURL& x) {
return !(x == spec);
}
diff --git a/url/gurl.h b/url/gurl.h
index 688a1018a..aed95bb4b 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -10,11 +10,11 @@
#include <iosfwd>
#include <memory>
#include <string>
+#include <string_view>
#include "base/component_export.h"
#include "base/debug/alias.h"
#include "base/debug/crash_logging.h"
-#include "base/strings/string_piece.h"
#include "base/trace_event/base_tracing_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -46,8 +46,8 @@
// will know to escape this and produce the desired result.
class COMPONENT_EXPORT(URL) GURL {
public:
- typedef url::StringPieceReplacements<char> Replacements;
- typedef url::StringPieceReplacements<char16_t> ReplacementsW;
+ using Replacements = url::StringViewReplacements<char>;
+ using ReplacementsW = url::StringViewReplacements<char16_t>;
// Creates an empty, invalid URL.
GURL();
@@ -57,9 +57,9 @@ class COMPONENT_EXPORT(URL) GURL {
GURL(const GURL& other);
GURL(GURL&& other) noexcept;
- // The strings to this contructor should be UTF-8 / UTF-16.
- explicit GURL(base::StringPiece url_string);
- explicit GURL(base::StringPiece16 url_string);
+ // The strings to this constructor should be UTF-8 / UTF-16.
+ explicit GURL(std::string_view url_string);
+ explicit GURL(std::u16string_view url_string);
// Constructor for URLs that have already been parsed and canonicalized. This
// is used for conversions from KURL, for example. The caller must supply all
@@ -152,8 +152,8 @@ class COMPONENT_EXPORT(URL) GURL {
//
// It is an error to resolve a URL relative to an invalid URL. The result
// will be the empty URL.
- GURL Resolve(base::StringPiece relative) const;
- GURL Resolve(base::StringPiece16 relative) const;
+ GURL Resolve(std::string_view relative) const;
+ GURL Resolve(std::u16string_view relative) const;
// Creates a new GURL by replacing the current URL's components with the
// supplied versions. See the Replacements class in url_canon.h for more.
@@ -238,7 +238,7 @@ class COMPONENT_EXPORT(URL) GURL {
// Returns true if the given parameter (should be lower-case ASCII to match
// the canonicalized scheme) is the scheme for this URL. Do not include a
// colon.
- bool SchemeIs(base::StringPiece lower_ascii_scheme) const;
+ bool SchemeIs(std::string_view lower_ascii_scheme) const;
// Returns true if the scheme is "http" or "https".
bool SchemeIsHTTPOrHTTPS() const;
@@ -267,7 +267,7 @@ class COMPONENT_EXPORT(URL) GURL {
bool SchemeIsCryptographic() const;
// As above, but static. Parameter should be lower-case ASCII.
- static bool SchemeIsCryptographic(base::StringPiece lower_ascii_scheme);
+ static bool SchemeIsCryptographic(std::string_view lower_ascii_scheme);
// Returns true if the scheme is "blob".
bool SchemeIsBlob() const {
@@ -286,7 +286,7 @@ class COMPONENT_EXPORT(URL) GURL {
// It is an error to get the content of an invalid URL: the result will be an
// empty string.
std::string GetContent() const;
- base::StringPiece GetContentPiece() const;
+ std::string_view GetContentPiece() const;
// Returns true if the hostname is an IP address. Note: this function isn't
// as cheap as a simple getter because it re-parses the hostname to verify.
@@ -297,7 +297,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string scheme() const {
return ComponentString(parsed_.scheme);
}
- base::StringPiece scheme_piece() const {
+ std::string_view scheme_piece() const {
return ComponentStringPiece(parsed_.scheme);
}
@@ -305,7 +305,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string username() const {
return ComponentString(parsed_.username);
}
- base::StringPiece username_piece() const {
+ std::string_view username_piece() const {
return ComponentStringPiece(parsed_.username);
}
@@ -313,7 +313,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string password() const {
return ComponentString(parsed_.password);
}
- base::StringPiece password_piece() const {
+ std::string_view password_piece() const {
return ComponentStringPiece(parsed_.password);
}
@@ -327,7 +327,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string host() const {
return ComponentString(parsed_.host);
}
- base::StringPiece host_piece() const {
+ std::string_view host_piece() const {
return ComponentStringPiece(parsed_.host);
}
@@ -338,7 +338,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string port() const {
return ComponentString(parsed_.port);
}
- base::StringPiece port_piece() const {
+ std::string_view port_piece() const {
return ComponentStringPiece(parsed_.port);
}
@@ -348,7 +348,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string path() const {
return ComponentString(parsed_.path);
}
- base::StringPiece path_piece() const {
+ std::string_view path_piece() const {
return ComponentStringPiece(parsed_.path);
}
@@ -357,7 +357,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string query() const {
return ComponentString(parsed_.query);
}
- base::StringPiece query_piece() const {
+ std::string_view query_piece() const {
return ComponentStringPiece(parsed_.query);
}
@@ -367,7 +367,7 @@ class COMPONENT_EXPORT(URL) GURL {
std::string ref() const {
return ComponentString(parsed_.ref);
}
- base::StringPiece ref_piece() const {
+ std::string_view ref_piece() const {
return ComponentStringPiece(parsed_.ref);
}
@@ -389,14 +389,14 @@ class COMPONENT_EXPORT(URL) GURL {
std::string PathForRequest() const;
// Returns the same characters as PathForRequest(), avoiding a copy.
- base::StringPiece PathForRequestPiece() const;
+ std::string_view PathForRequestPiece() const;
// Returns the host, excluding the square brackets surrounding IPv6 address
// literals. This can be useful for passing to getaddrinfo().
std::string HostNoBrackets() const;
// Returns the same characters as HostNoBrackets(), avoiding a copy.
- base::StringPiece HostNoBracketsPiece() const;
+ std::string_view HostNoBracketsPiece() const;
// Returns true if this URL's host matches or is in the same domain as
// the given input string. For example, if the hostname of the URL is
@@ -409,7 +409,7 @@ class COMPONENT_EXPORT(URL) GURL {
// This call is more efficient than getting the host and checking whether the
// host has the specific domain or not because no copies or object
// constructions are done.
- bool DomainIs(base::StringPiece canonical_domain) const;
+ bool DomainIs(std::string_view canonical_domain) const;
// Checks whether or not two URLs differ only in the ref (the part after
// the # character).
@@ -440,8 +440,8 @@ class COMPONENT_EXPORT(URL) GURL {
size_t EstimateMemoryUsage() const;
// Helper used by GURL::IsAboutUrl and KURL::IsAboutURL.
- static bool IsAboutPath(base::StringPiece actual_path,
- base::StringPiece allowed_path);
+ static bool IsAboutPath(std::string_view actual_path,
+ std::string_view allowed_path);
void WriteIntoTrace(perfetto::TracedValue context) const;
@@ -460,17 +460,17 @@ class COMPONENT_EXPORT(URL) GURL {
void InitializeFromCanonicalSpec();
// Helper used by IsAboutBlank and IsAboutSrcdoc.
- bool IsAboutUrl(base::StringPiece allowed_path) const;
+ bool IsAboutUrl(std::string_view allowed_path) const;
// Returns the substring of the input identified by the given component.
std::string ComponentString(const url::Component& comp) const {
return std::string(ComponentStringPiece(comp));
}
- base::StringPiece ComponentStringPiece(const url::Component& comp) const {
+ std::string_view ComponentStringPiece(const url::Component& comp) const {
if (comp.is_empty())
- return base::StringPiece();
- return base::StringPiece(spec_).substr(static_cast<size_t>(comp.begin),
- static_cast<size_t>(comp.len));
+ return std::string_view();
+ return std::string_view(spec_).substr(static_cast<size_t>(comp.begin),
+ static_cast<size_t>(comp.len));
}
void ProcessFileSystemURLAfterReplaceComponents();
@@ -501,13 +501,13 @@ COMPONENT_EXPORT(URL) bool operator!=(const GURL& x, const GURL& y);
// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
// needlessly re-parsing |spec| into a temporary GURL.
COMPONENT_EXPORT(URL)
-bool operator==(const GURL& x, const base::StringPiece& spec);
+bool operator==(const GURL& x, std::string_view spec);
COMPONENT_EXPORT(URL)
-bool operator==(const base::StringPiece& spec, const GURL& x);
+bool operator==(std::string_view spec, const GURL& x);
COMPONENT_EXPORT(URL)
-bool operator!=(const GURL& x, const base::StringPiece& spec);
+bool operator!=(const GURL& x, std::string_view spec);
COMPONENT_EXPORT(URL)
-bool operator!=(const base::StringPiece& spec, const GURL& x);
+bool operator!=(std::string_view spec, const GURL& x);
// DEBUG_ALIAS_FOR_GURL(var_name, url) copies |url| into a new stack-allocated
// variable named |<var_name>|. This helps ensure that the value of |url| gets
diff --git a/url/gurl_abstract_tests.h b/url/gurl_abstract_tests.h
index 3cde84205..6ef976c13 100644
--- a/url/gurl_abstract_tests.h
+++ b/url/gurl_abstract_tests.h
@@ -11,7 +11,7 @@
// by parametrizing the tests with a class that has to expose the following
// members:
// using UrlType = ...;
-// static UrlType CreateUrlFromString(base::StringPiece s);
+// static UrlType CreateUrlFromString(std::string_view s);
// static bool IsAboutBlank(const UrlType& url);
// static bool IsAboutSrcdoc(const UrlType& url);
template <typename TUrlTraits>
@@ -23,7 +23,7 @@ class AbstractUrlTest : public testing::Test {
// avoid hitting: explicit qualification required to use member 'IsAboutBlank'
// from dependent base class.
using UrlType = typename TUrlTraits::UrlType;
- UrlType CreateUrlFromString(base::StringPiece s) {
+ UrlType CreateUrlFromString(std::string_view s) {
return TUrlTraits::CreateUrlFromString(s);
}
bool IsAboutBlank(const UrlType& url) {
diff --git a/url/gurl_fuzzer.cc b/url/gurl_fuzzer.cc
index 029a387e4..34c3773f8 100644
--- a/url/gurl_fuzzer.cc
+++ b/url/gurl_fuzzer.cc
@@ -45,15 +45,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 1)
return 0;
{
- base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
- size);
+ std::string_view string_piece_input(reinterpret_cast<const char*>(data),
+ size);
const GURL url_from_string_piece(string_piece_input);
CheckIdempotency(url_from_string_piece);
CheckReplaceComponentsPreservesSpec(url_from_string_piece);
}
- // Test for StringPiece16 if size is even.
+ // Test for std::u16string_view if size is even.
if (size % sizeof(char16_t) == 0) {
- base::StringPiece16 string_piece_input16(
+ std::u16string_view string_piece_input16(
reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t));
const GURL url_from_string_piece16(string_piece_input16);
CheckIdempotency(url_from_string_piece16);
@@ -69,7 +69,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
*reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
std::string relative_string(
reinterpret_cast<const char*>(data + size_t_bytes), relative_size);
- base::StringPiece string_piece_part_input(
+ std::string_view string_piece_part_input(
reinterpret_cast<const char*>(data + size_t_bytes + relative_size),
size - relative_size - size_t_bytes);
const GURL url_from_string_piece_part(string_piece_part_input);
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index af8421d97..284dfbc68 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -838,8 +838,8 @@ TEST(GURLTest, DomainIs) {
GURL url_with_escape_chars("https://www.,.test");
EXPECT_TRUE(url_with_escape_chars.is_valid());
- EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test");
- EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test"));
+ EXPECT_EQ(url_with_escape_chars.host(), "www.,.test");
+ EXPECT_TRUE(url_with_escape_chars.DomainIs(",.test"));
}
TEST(GURLTest, DomainIsTerminatingDotBehavior) {
@@ -1167,7 +1167,7 @@ class GURLTestTraits {
public:
using UrlType = GURL;
- static UrlType CreateUrlFromString(base::StringPiece s) { return GURL(s); }
+ static UrlType CreateUrlFromString(std::string_view s) { return GURL(s); }
static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
diff --git a/url/ipc/BUILD.gn b/url/ipc/BUILD.gn
index d7801af54..36fba9a89 100644
--- a/url/ipc/BUILD.gn
+++ b/url/ipc/BUILD.gn
@@ -18,6 +18,7 @@ component("url_ipc") {
"//url",
]
deps = [ "//base" ]
+ configs += [ "//build/config/compiler:wexit_time_destructors" ]
}
# IPC unit tests aren't build on iOS.
diff --git a/url/mojom/BUILD.gn b/url/mojom/BUILD.gn
index a936a432b..7a35b09f4 100644
--- a/url/mojom/BUILD.gn
+++ b/url/mojom/BUILD.gn
@@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+import("//build/config/chromeos/ui_mode.gni")
import("//mojo/public/tools/bindings/mojom.gni")
mojom("url_mojom_gurl") {
@@ -42,6 +43,13 @@ mojom("url_mojom_gurl") {
]
webui_module_path = "chrome://resources/mojo/url/mojom"
+
+ # Used by Closure compiler targets in ash/webui/help_app_ui/, so this needs
+ # to generate WebUI bindings in JavaScript on Ash.
+ use_typescript_sources = !is_chromeos_ash
+
+ # Legacy JS bindings are needed for Blink regardless of platform.
+ generate_legacy_js_bindings = true
}
mojom("url_mojom_origin") {
@@ -86,6 +94,8 @@ mojom("url_mojom_origin") {
]
webui_module_path = "chrome://resources/mojo/url/mojom"
+ use_typescript_sources = true
+ generate_legacy_js_bindings = true
}
mojom("url_mojom_scheme_host_port") {
@@ -138,4 +148,6 @@ component("mojom_traits") {
"//mojo/public/cpp/base:shared_typemap_traits",
"//url",
]
+
+ configs += [ "//build/config/compiler:wexit_time_destructors" ]
}
diff --git a/url/mojom/origin_mojom_traits.cc b/url/mojom/origin_mojom_traits.cc
index 9e8475ac5..5b728b46b 100644
--- a/url/mojom/origin_mojom_traits.cc
+++ b/url/mojom/origin_mojom_traits.cc
@@ -4,7 +4,7 @@
#include "url/mojom/origin_mojom_traits.h"
-#include "base/strings/string_piece.h"
+#include <string_view>
namespace mojo {
@@ -12,7 +12,7 @@ namespace mojo {
bool StructTraits<url::mojom::OriginDataView, url::Origin>::Read(
url::mojom::OriginDataView data,
url::Origin* out) {
- base::StringPiece scheme, host;
+ std::string_view scheme, host;
absl::optional<base::UnguessableToken> nonce_if_opaque;
if (!data.ReadScheme(&scheme) || !data.ReadHost(&host) ||
!data.ReadNonceIfOpaque(&nonce_if_opaque))
diff --git a/url/mojom/scheme_host_port_mojom_traits.cc b/url/mojom/scheme_host_port_mojom_traits.cc
index 63f6af4c1..01a50faf4 100644
--- a/url/mojom/scheme_host_port_mojom_traits.cc
+++ b/url/mojom/scheme_host_port_mojom_traits.cc
@@ -4,7 +4,8 @@
#include "url/mojom/scheme_host_port_mojom_traits.h"
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "url/mojom/scheme_host_port.mojom-shared.h"
#include "url/scheme_host_port.h"
@@ -13,7 +14,7 @@ namespace mojo {
// static
bool StructTraits<url::mojom::SchemeHostPortDataView, url::SchemeHostPort>::
Read(url::mojom::SchemeHostPortDataView data, url::SchemeHostPort* out) {
- base::StringPiece scheme, host;
+ std::string_view scheme, host;
if (!data.ReadScheme(&scheme) || !data.ReadHost(&host))
return false;
diff --git a/url/mojom/url_gurl_mojom_traits.cc b/url/mojom/url_gurl_mojom_traits.cc
index 97b301a0a..722d6ce92 100644
--- a/url/mojom/url_gurl_mojom_traits.cc
+++ b/url/mojom/url_gurl_mojom_traits.cc
@@ -9,23 +9,23 @@
namespace mojo {
// static
-base::StringPiece StructTraits<url::mojom::UrlDataView, GURL>::url(
+std::string_view StructTraits<url::mojom::UrlDataView, GURL>::url(
const GURL& r) {
if (r.possibly_invalid_spec().length() > url::kMaxURLChars || !r.is_valid()) {
- return base::StringPiece();
+ return std::string_view();
}
- return base::StringPiece(r.possibly_invalid_spec().c_str(),
- r.possibly_invalid_spec().length());
+ return r.possibly_invalid_spec();
}
// static
bool StructTraits<url::mojom::UrlDataView, GURL>::Read(
url::mojom::UrlDataView data,
GURL* out) {
- base::StringPiece url_string;
- if (!data.ReadUrl(&url_string))
+ std::string_view url_string;
+ if (!data.ReadUrl(&url_string)) {
return false;
+ }
if (url_string.length() > url::kMaxURLChars)
return false;
diff --git a/url/mojom/url_gurl_mojom_traits.h b/url/mojom/url_gurl_mojom_traits.h
index 19ac049c8..ae29cc3f7 100644
--- a/url/mojom/url_gurl_mojom_traits.h
+++ b/url/mojom/url_gurl_mojom_traits.h
@@ -5,8 +5,9 @@
#ifndef URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
#define URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
+#include <string_view>
+
#include "base/component_export.h"
-#include "base/strings/string_piece.h"
#include "mojo/public/cpp/bindings/struct_traits.h"
#include "url/gurl.h"
#include "url/mojom/url.mojom-shared.h"
@@ -16,7 +17,7 @@ namespace mojo {
template <>
struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
StructTraits<url::mojom::UrlDataView, GURL> {
- static base::StringPiece url(const GURL& r);
+ static std::string_view url(const GURL& r);
static bool Read(url::mojom::UrlDataView data, GURL* out);
};
diff --git a/url/origin.cc b/url/origin.cc
index 38be245a4..ce3e080ba 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -9,6 +9,7 @@
#include <algorithm>
#include <ostream>
#include <string>
+#include <string_view>
#include <tuple>
#include <utility>
@@ -20,8 +21,8 @@
#include "base/debug/crash_logging.h"
#include "base/pickle.h"
#include "base/strings/strcat.h"
-#include "base/strings/string_piece.h"
#include "base/trace_event/base_tracing.h"
+#include "base/trace_event/memory_usage_estimator.h"
#include "base/unguessable_token.h"
#include "url/gurl.h"
#include "url/scheme_host_port.h"
@@ -78,8 +79,8 @@ Origin::~Origin() = default;
// static
absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string_view scheme,
+ std::string_view host,
uint16_t port) {
SchemeHostPort tuple(std::string(scheme), std::string(host), port,
SchemeHostPort::CHECK_CANONICALIZATION);
@@ -90,8 +91,8 @@ absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
// static
absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
- base::StringPiece precursor_scheme,
- base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Origin::Nonce& nonce) {
SchemeHostPort precursor(std::string(precursor_scheme),
@@ -248,7 +249,7 @@ bool Origin::CanBeDerivedFrom(const GURL& url) const {
return url.scheme() == tuple_.scheme();
}
-bool Origin::DomainIs(base::StringPiece canonical_domain) const {
+bool Origin::DomainIs(std::string_view canonical_domain) const {
return !opaque() && url::DomainIs(tuple_.host(), canonical_domain);
}
@@ -393,6 +394,10 @@ void Origin::WriteIntoTrace(perfetto::TracedValue context) const {
std::move(context).WriteString(GetDebugString());
}
+size_t Origin::EstimateMemoryUsage() const {
+ return base::trace_event::EstimateMemoryUsage(tuple_);
+}
+
std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
out << origin.GetDebugString();
return out;
diff --git a/url/origin.h b/url/origin.h
index a0575338a..eaf109cbc 100644
--- a/url/origin.h
+++ b/url/origin.h
@@ -9,32 +9,24 @@
#include <memory>
#include <string>
+#include <string_view>
#include "base/component_export.h"
#include "base/debug/alias.h"
#include "base/debug/crash_logging.h"
#include "base/gtest_prod_util.h"
-#include "base/strings/string_piece_forward.h"
#include "base/strings/string_util.h"
#include "base/trace_event/base_tracing_forward.h"
#include "base/unguessable_token.h"
#include "build/build_config.h"
#include "build/buildflag.h"
+#include "build/robolectric_buildflags.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/scheme_host_port.h"
-#if BUILDFLAG(IS_ANDROID)
-#include <jni.h>
-
-namespace base {
-namespace android {
-template <typename>
-class ScopedJavaLocalRef;
-template <typename>
-class JavaRef;
-} // namespace android
-} // namespace base
-#endif // BUILDFLAG(IS_ANDROID)
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+#include "base/android/jni_android.h"
+#endif
class GURL;
@@ -199,8 +191,8 @@ class COMPONENT_EXPORT(URL) Origin {
// dangerous recanonicalization); other potential callers should prefer the
// 'GURL'-based constructor.
static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string_view scheme,
+ std::string_view host,
uint16_t port);
// Creates an origin without sanity checking that the host is canonicalized.
@@ -285,7 +277,7 @@ class COMPONENT_EXPORT(URL) Origin {
GURL GetURL() const;
// Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
- bool DomainIs(base::StringPiece canonical_domain) const;
+ bool DomainIs(std::string_view canonical_domain) const;
// Allows Origin to be used as a key in STL (for example, a std::set or
// std::map).
@@ -314,8 +306,8 @@ class COMPONENT_EXPORT(URL) Origin {
// and precursor information.
std::string GetDebugString(bool include_nonce = true) const;
-#if BUILDFLAG(IS_ANDROID)
- base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+ base::android::ScopedJavaLocalRef<jobject> ToJavaObject() const;
static Origin FromJavaObject(
const base::android::JavaRef<jobject>& java_origin);
static jlong CreateNative(JNIEnv* env,
@@ -329,7 +321,18 @@ class COMPONENT_EXPORT(URL) Origin {
void WriteIntoTrace(perfetto::TracedValue context) const;
+ // Estimates dynamic memory usage.
+ // See base/trace_event/memory_usage_estimator.h for more info.
+ size_t EstimateMemoryUsage() const;
+
private:
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+ friend Origin CreateOpaqueOriginForAndroid(
+ const std::string& scheme,
+ const std::string& host,
+ uint16_t port,
+ const base::UnguessableToken& nonce_token);
+#endif
friend class blink::SecurityOrigin;
friend class blink::SecurityOriginTest;
friend class blink::StorageKey;
@@ -415,8 +418,8 @@ class COMPONENT_EXPORT(URL) Origin {
// back and forth over IPC (as transitioning through GURL would risk
// potentially dangerous recanonicalization).
static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
- base::StringPiece precursor_scheme,
- base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Nonce& nonce);
diff --git a/url/origin_abstract_tests.cc b/url/origin_abstract_tests.cc
index 1bc032e4e..175abfffa 100644
--- a/url/origin_abstract_tests.cc
+++ b/url/origin_abstract_tests.cc
@@ -29,7 +29,7 @@ void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
}
// static
-Origin UrlOriginTestTraits::CreateOriginFromString(base::StringPiece s) {
+Origin UrlOriginTestTraits::CreateOriginFromString(std::string_view s) {
return Origin::Create(GURL(s));
}
@@ -40,7 +40,7 @@ Origin UrlOriginTestTraits::CreateUniqueOpaqueOrigin() {
// static
Origin UrlOriginTestTraits::CreateWithReferenceOrigin(
- base::StringPiece url,
+ std::string_view url,
const Origin& reference_origin) {
return Origin::Resolve(GURL(url), reference_origin);
}
@@ -94,7 +94,7 @@ std::string UrlOriginTestTraits::Serialize(const Origin& origin) {
}
// static
-bool UrlOriginTestTraits::IsValidUrl(base::StringPiece str) {
+bool UrlOriginTestTraits::IsValidUrl(std::string_view str) {
return GURL(str).is_valid();
}
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
index 63dded619..cb6cf9b40 100644
--- a/url/origin_abstract_tests.h
+++ b/url/origin_abstract_tests.h
@@ -6,10 +6,10 @@
#define URL_ORIGIN_ABSTRACT_TESTS_H_
#include <string>
+#include <string_view>
#include <type_traits>
#include "base/containers/contains.h"
-#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
@@ -28,10 +28,10 @@ class UrlOriginTestTraits {
using OriginType = Origin;
// Constructing an origin.
- static OriginType CreateOriginFromString(base::StringPiece s);
+ static OriginType CreateOriginFromString(std::string_view s);
static OriginType CreateUniqueOpaqueOrigin();
static OriginType CreateWithReferenceOrigin(
- base::StringPiece url,
+ std::string_view url,
const OriginType& reference_origin);
static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin);
@@ -51,7 +51,7 @@ class UrlOriginTestTraits {
//
// TODO(lukasza): Consider merging together OriginTraitsBase here and
// UrlTraitsBase in //url/gurl_abstract_tests.h.
- static bool IsValidUrl(base::StringPiece str);
+ static bool IsValidUrl(std::string_view str);
// Only static members = no constructors are needed.
UrlOriginTestTraits() = delete;
@@ -95,13 +95,13 @@ class AbstractOriginTest : public testing::Test {
// avoid hitting: explicit qualification required to use member 'IsOpaque'
// from dependent base class.
using OriginType = typename TOriginTraits::OriginType;
- OriginType CreateOriginFromString(base::StringPiece s) {
+ OriginType CreateOriginFromString(std::string_view s) {
return TOriginTraits::CreateOriginFromString(s);
}
OriginType CreateUniqueOpaqueOrigin() {
return TOriginTraits::CreateUniqueOpaqueOrigin();
}
- OriginType CreateWithReferenceOrigin(base::StringPiece url,
+ OriginType CreateWithReferenceOrigin(std::string_view url,
const OriginType& reference_origin) {
return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin);
}
@@ -132,7 +132,7 @@ class AbstractOriginTest : public testing::Test {
std::string Serialize(const OriginType& origin) {
return TOriginTraits::Serialize(origin);
}
- bool IsValidUrl(base::StringPiece str) {
+ bool IsValidUrl(std::string_view str) {
return TOriginTraits::IsValidUrl(str);
}
@@ -213,7 +213,7 @@ class AbstractOriginTest : public testing::Test {
VerifyOriginInvariants(origin);
}
- void TestUniqueOpaqueOrigin(base::StringPiece test_input) {
+ void TestUniqueOpaqueOrigin(std::string_view test_input) {
auto origin = this->CreateOriginFromString(test_input);
this->VerifyUniqueOpaqueOriginInvariants(origin);
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 47cca812a..5bca5761d 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -76,8 +76,8 @@ class OriginTest : public ::testing::Test {
// Wrappers around url::Origin methods to expose it to tests.
absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
- base::StringPiece precursor_scheme,
- base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Origin::Nonce& nonce) {
return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
@@ -371,8 +371,8 @@ TEST_F(OriginTest, UnsafelyCreateUniqueOnInvalidInput) {
TEST_F(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
struct TestCases {
- base::StringPiece scheme;
- base::StringPiece host;
+ std::string_view scheme;
+ std::string_view host;
uint16_t port = 80;
} cases[] = {{{"http\0more", 9}, {"example.com", 11}},
{{"http\0", 5}, {"example.com", 11}},
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 490ae9a78..6b617dcd1 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -8,6 +8,7 @@
#include <string.h>
#include <ostream>
+#include <string_view>
#include <tuple>
#include "base/check_op.h"
@@ -15,7 +16,7 @@
#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
-#include "base/strings/string_piece.h"
+#include "base/trace_event/memory_usage_estimator.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -27,7 +28,7 @@ namespace url {
namespace {
-bool IsCanonicalHost(const base::StringPiece& host) {
+bool IsCanonicalHost(const std::string_view& host) {
std::string canon_host;
// Try to canonicalize the host (copy/pasted from net/base. :( ).
@@ -55,8 +56,8 @@ bool IsCanonicalHost(const base::StringPiece& host) {
// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
// behavior between these 2 layers, but we should avoid introducing new
// differences).
-bool IsValidInput(const base::StringPiece& scheme,
- const base::StringPiece& host,
+bool IsValidInput(const std::string_view& scheme,
+ const std::string_view& host,
uint16_t port,
SchemeHostPort::ConstructPolicy policy) {
// Empty schemes are never valid.
@@ -158,8 +159,8 @@ SchemeHostPort::SchemeHostPort(std::string scheme,
<< " Port: " << port;
}
-SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string_view scheme,
+ std::string_view host,
uint16_t port)
: SchemeHostPort(std::string(scheme),
std::string(host),
@@ -170,8 +171,8 @@ SchemeHostPort::SchemeHostPort(const GURL& url) {
if (!url.is_valid())
return;
- base::StringPiece scheme = url.scheme_piece();
- base::StringPiece host = url.host_piece();
+ std::string_view scheme = url.scheme_piece();
+ std::string_view host = url.host_piece();
// A valid GURL never returns PORT_INVALID.
int port = url.EffectiveIntPort();
@@ -229,6 +230,11 @@ GURL SchemeHostPort::GetURL() const {
return GURL(std::move(serialized), parsed, true);
}
+size_t SchemeHostPort::EstimateMemoryUsage() const {
+ return base::trace_event::EstimateMemoryUsage(scheme_) +
+ base::trace_event::EstimateMemoryUsage(host_);
+}
+
bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
return std::tie(port_, scheme_, host_) <
std::tie(other.port_, other.scheme_, other.host_);
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h
index a98e7affd..b649c922b 100644
--- a/url/scheme_host_port.h
+++ b/url/scheme_host_port.h
@@ -8,9 +8,9 @@
#include <stdint.h>
#include <string>
+#include <string_view>
#include "base/component_export.h"
-#include "base/strings/string_piece.h"
class GURL;
@@ -84,9 +84,7 @@ class COMPONENT_EXPORT(URL) SchemeHostPort {
// ports (e.g. 'file').
//
// Copies the data in |scheme| and |host|.
- SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
- uint16_t port);
+ SchemeHostPort(std::string_view scheme, std::string_view host, uint16_t port);
// Metadata influencing whether or not the constructor should sanity check
// host canonicalization.
@@ -139,6 +137,10 @@ class COMPONENT_EXPORT(URL) SchemeHostPort {
// For example, see crrev.com/c/3637099/comments/782360d0_e14757be.
GURL GetURL() const;
+ // Estimates dynamic memory usage.
+ // See base/trace_event/memory_usage_estimator.h for more info.
+ size_t EstimateMemoryUsage() const;
+
// Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
// are exact matches.
//
diff --git a/url/url_canon.h b/url/url_canon.h
index 94b44426f..d3a7fabf0 100644
--- a/url/url_canon.h
+++ b/url/url_canon.h
@@ -8,6 +8,8 @@
#include <stdlib.h>
#include <string.h>
+#include <string_view>
+
#include "base/component_export.h"
#include "base/export_template.h"
#include "base/memory/raw_ptr_exclusion.h"
@@ -57,6 +59,11 @@ class CanonOutputT {
// write the data, then use set_size() to declare the new length().
size_t capacity() const { return buffer_len_; }
+ // Returns the contents of the buffer as a string_view.
+ std::basic_string_view<T> view() const {
+ return std::basic_string_view<T>(data(), length());
+ }
+
// Called by the user of this class to get the output. The output will NOT
// be NULL-terminated. Call length() to get the
// length.
@@ -102,6 +109,8 @@ class CanonOutputT {
cur_len_ += str_len;
}
+ void Append(std::basic_string_view<T> str) { Append(str.data(), str.size()); }
+
void ReserveSizeIfNeeded(size_t estimated_size) {
// Reserve a bit extra to account for escaped chars.
if (estimated_size > buffer_len_)
@@ -278,7 +287,7 @@ const char16_t* RemoveURLWhitespace(const char16_t* input,
//
// On error, returns false. The output in this case is undefined.
COMPONENT_EXPORT(URL)
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc
index 0472484de..f1a9f1c5c 100644
--- a/url/url_canon_filesystemurl.cc
+++ b/url/url_canon_filesystemurl.cc
@@ -36,7 +36,7 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec,
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("filesystem:", 11);
+ output->Append("filesystem:");
new_parsed->scheme.len = 10;
if (!inner_parsed || !inner_parsed->scheme.is_valid())
@@ -46,7 +46,7 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec,
SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
new_inner_parsed.scheme.begin = output->length();
- output->Append("file://", 7);
+ output->Append("file://");
new_inner_parsed.scheme.len = 4;
success &= CanonicalizePath(spec, inner_parsed->path, output,
&new_inner_parsed.path);
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc
index b45114d77..802fe427c 100644
--- a/url/url_canon_fileurl.cc
+++ b/url/url_canon_fileurl.cc
@@ -4,7 +4,8 @@
// Functions for canonicalizing "file:" URLs.
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -18,13 +19,13 @@ namespace {
bool IsLocalhost(const char* spec, int begin, int end) {
if (begin > end)
return false;
- return base::StringPiece(&spec[begin], end - begin) == "localhost";
+ return std::string_view(&spec[begin], end - begin) == "localhost";
}
bool IsLocalhost(const char16_t* spec, int begin, int end) {
if (begin > end)
return false;
- return base::StringPiece16(&spec[begin], end - begin) == u"localhost";
+ return std::u16string_view(&spec[begin], end - begin) == u"localhost";
}
template <typename CHAR>
@@ -134,7 +135,7 @@ bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("file://", 7);
+ output->Append("file://");
new_parsed->scheme.len = 4;
// If the host is localhost, and the path starts with a Windows drive letter,
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index d3b1222f1..4e45839f8 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -6,11 +6,14 @@
#include "base/cpu_reduction_experiment.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_features.h"
namespace url {
namespace {
+// clang-format off
+//
// For reference, here's what IE supports:
// Key: 0 (disallowed: failure if present in the input)
// + (allowed either escaped or unescaped, and unmodified)
@@ -37,19 +40,15 @@ namespace {
// I also didn't test if characters affecting HTML parsing are allowed
// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
// Surprisingly, space is accepted in the input and always escaped.
-
+//
+// TODO(https://crbug.com/1416013): Remove the above historical reference
+// information once we are 100% standard compliant to the URL Standard.
+//
// This table lists the canonical version of all characters we allow in the
// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar
// value to indicate that this character should be escaped. We are a little more
// restrictive than IE, but less restrictive than Firefox.
//
-// Note that we disallow the % character. We will allow it when part of an
-// escape sequence, of course, but this disallows "%25". Even though IE allows
-// it, allowing it would put us in a funny state. If there was an invalid
-// escape sequence like "%zz", we'll add "%25zz" to the output and fail.
-// Allowing percents means we'll succeed a second time, so validity would change
-// based on how many times you run the canonicalizer. We prefer to always report
-// the same vailidity, so reject this.
const unsigned char kEsc = 0xff;
const unsigned char kHostCharLookup[0x80] = {
// 00-1f: all are invalid
@@ -68,6 +67,27 @@ const unsigned char kHostCharLookup[0x80] = {
// p q r s t u v w x y z { | } ~
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 };
+// The following table is used when kStandardCompliantHostCharLookup feature is
+// enabled. See https://crbug.com/1416013 for details. At present, ' ' (SPACE)
+// and '*' (asterisk) are still non-compliant to the URL Standard.
+const unsigned char kStandardCompliantHostCharLookup[0x80] = {
+// 00-1f: all are invalid
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+// ' ' ! " # $ % & ' ( ) * + , - . /
+ kEsc,'!', '"', 0, '$', 0, '&', '\'','(', ')', kEsc, '+', ',', '-', '.', 0,
+// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0, '=', 0, 0,
+// @ A B C D E F G H I J K L M N O
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+// P Q R S T U V W X Y Z [ \ ] ^ _
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0, ']', 0, '_',
+// ` a b c d e f g h i j k l m n o
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+// p q r s t u v w x y z { | } ~
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', 0, '}', '~', 0 };
+// clang-format on
+
// RFC1034 maximum FQDN length.
constexpr size_t kMaxHostLength = 253;
@@ -149,7 +169,12 @@ bool DoSimpleHost(const INCHAR* host,
if (source < 0x80) {
// We have ASCII input, we can use our lookup table.
- unsigned char replacement = kHostCharLookup[source];
+ unsigned char replacement;
+ if (url::IsUsingStandardCompliantHostCharacters()) {
+ replacement = kStandardCompliantHostCharLookup[source];
+ } else {
+ replacement = kHostCharLookup[source];
+ }
if (!replacement) {
// Invalid character, add it as percent-escaped and mark as failed.
AppendEscapedChar(source, output);
@@ -189,9 +214,7 @@ bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
}
StackBufferW wide_output;
- if (!IDNToASCII(url_escaped_host.data(),
- url_escaped_host.length(),
- &wide_output)) {
+ if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
// Some error, give up. This will write some reasonable looking
// representation of the string to the output.
AppendInvalidNarrowString(src, 0, src_len, output);
@@ -381,7 +404,7 @@ void DoHost(const CHAR* spec,
// we just leave it in place.
if (host_info->IsIPAddress()) {
output->set_length(output_begin);
- output->Append(canon_ip.data(), canon_ip.length());
+ output->Append(canon_ip.view());
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index 574a5ed0d..5e1bcdd0e 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -281,11 +281,6 @@ const unsigned char kSharedCharTypeTable[0x100] = {
};
// clang-format on
-const char kHexCharLookup[0x10] = {
- '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
-};
-
const char kCharToHexLookup[8] = {
0, // 0x00 - 0x1f
'0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 0f1400d56..c92873551 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -13,8 +13,11 @@
#include <stddef.h>
#include <stdlib.h>
+#include <string>
+
#include "base/component_export.h"
#include "base/notreached.h"
+#include "base/strings/string_number_conversions.h"
#include "base/third_party/icu/icu_utf.h"
#include "url/url_canon.h"
@@ -86,10 +89,6 @@ void AppendStringOfType(const char16_t* source,
SharedCharTypes type,
CanonOutput* output);
-// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
-// that will be used to represent it.
-COMPONENT_EXPORT(URL) extern const char kHexCharLookup[0x10];
-
// This lookup table allows fast conversion between ASCII hex letters and their
// corresponding numerical value. The 8-bit range is divided up into 8
// regions of 0x20 characters each. Each of the three character types (numbers,
@@ -135,8 +134,10 @@ char CanonicalSchemeChar(char16_t ch);
template <typename UINCHAR, typename OUTCHAR>
inline void AppendEscapedChar(UINCHAR ch, CanonOutputT<OUTCHAR>* output) {
output->push_back('%');
- output->push_back(static_cast<OUTCHAR>(kHexCharLookup[(ch >> 4) & 0xf]));
- output->push_back(static_cast<OUTCHAR>(kHexCharLookup[ch & 0xf]));
+ std::string hex;
+ base::AppendHexEncodedByte(static_cast<uint8_t>(ch), hex);
+ output->push_back(static_cast<OUTCHAR>(hex[0]));
+ output->push_back(static_cast<OUTCHAR>(hex[1]));
}
// The character we'll substitute for undecodable or invalid characters.
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc
index 783ddccf8..f79a03ace 100644
--- a/url/url_canon_ip.cc
+++ b/url/url_canon_ip.cc
@@ -504,9 +504,7 @@ bool DoIPv6AddressToNumber(const CHAR* spec,
&num_ipv4_components)) {
return false;
}
- if ((num_ipv4_components != 4 || trailing_dot) &&
- base::FeatureList::IsEnabled(
- url::kStrictIPv4EmbeddedIPv6AddressParsing)) {
+ if ((num_ipv4_components != 4 || trailing_dot)) {
return false;
}
}
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index e48b6422f..cbd4bb48d 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc
@@ -45,7 +45,7 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("mailto:", 7);
+ output->Append("mailto:");
new_parsed->scheme.len = 6;
bool success = true;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 2cd84b409..a2add6352 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -6,10 +6,10 @@
#include "base/check.h"
#include "base/check_op.h"
-#include "base/metrics/histogram_functions.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_features.h"
#include "url/url_parse_internal.h"
namespace url {
@@ -17,9 +17,9 @@ namespace url {
namespace {
enum CharacterFlags {
- // Pass through unchanged, whether escaped or unescaped. This doesn't
+ // Pass through unchanged, whether escaped or not. This doesn't
// actually set anything so you can't OR it to check, it's just to make the
- // table below more clear when neither ESCAPE or UNESCAPE is set.
+ // table below more clear when any other flag is not set.
PASS = 0,
// This character requires special handling in DoPartialPathInternal. Doing
@@ -33,11 +33,6 @@ enum CharacterFlags {
// for this is triggered. Not valid with PASS or ESCAPE
ESCAPE_BIT = 2,
ESCAPE = ESCAPE_BIT | SPECIAL,
-
- // This character must be unescaped in canonical output. Not valid with
- // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these
- // characters unescaped, they should just be copied.
- UNESCAPE = 4,
};
// This table contains one of the above flag values. Note some flags are more
@@ -50,8 +45,9 @@ enum CharacterFlags {
// to comply with the URL Standard.
//
// Dot is even more special, and the escaped version is handled specially by
-// IsDot. Therefore, we don't need the "escape" flag, and even the "unescape"
-// bit is never handled (we just need the "special") bit.
+// IsDot. Therefore, we don't need the "escape" flag. We just need the "special"
+// bit.
+//
// clang-format off
const unsigned char kPathCharLookup[0x100] = {
// NULL control chars...
@@ -59,17 +55,17 @@ const unsigned char kPathCharLookup[0x100] = {
// control chars...
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
// ' ' ! " # $ % & ' ( ) * + , - . /
- ESCAPE, PASS, ESCAPE, ESCAPE, PASS, ESCAPE, PASS, PASS, PASS, PASS, PASS, PASS, PASS, UNESCAPE,SPECIAL, PASS,
+ ESCAPE, PASS, ESCAPE, ESCAPE, PASS, ESCAPE, PASS, PASS, PASS, PASS, PASS, PASS, PASS, PASS ,SPECIAL, PASS,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
- UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, PASS, ESCAPE, PASS, ESCAPE, ESCAPE,
+ PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS, PASS, ESCAPE, PASS, ESCAPE, ESCAPE,
// @ A B C D E F G H I J K L M N O
- PASS, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
+ PASS, PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,
// P Q R S T U V W X Y Z [ \ ] ^ _
- UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, ESCAPE, PASS, ESCAPE, UNESCAPE,
+ PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS, ESCAPE, PASS, ESCAPE, PASS ,
// ` a b c d e f g h i j k l m n o
- ESCAPE, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
+ ESCAPE, PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,
// p q r s t u v w x y z { | } ~ <NBSP>
- UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,ESCAPE, ESCAPE, ESCAPE, UNESCAPE,ESCAPE,
+ PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,ESCAPE, ESCAPE, ESCAPE, PASS ,ESCAPE,
// ...all the high-bit characters are escaped
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
@@ -170,76 +166,6 @@ void BackUpToPreviousSlash(size_t path_begin_in_output, CanonOutput* output) {
output->set_length(i + 1);
}
-// Looks for problematic nested escape sequences and escapes the output as
-// needed to ensure they can't be misinterpreted.
-//
-// Our concern is that in input escape sequence that's invalid because it
-// contains nested escape sequences might look valid once those are unescaped.
-// For example, "%%300" is not a valid escape sequence, but after unescaping the
-// inner "%30" this becomes "%00" which is valid. Leaving this in the output
-// string can result in callers re-canonicalizing the string and unescaping this
-// sequence, thus resulting in something fundamentally different than the
-// original input here. This can cause a variety of problems.
-//
-// This function is called after we've just unescaped a sequence that's within
-// two output characters of a previous '%' that we know didn't begin a valid
-// escape sequence in the input string. We look for whether the output is going
-// to turn into a valid escape sequence, and if so, convert the initial '%' into
-// an escaped "%25" so the output can't be misinterpreted.
-//
-// |spec| is the input string we're canonicalizing.
-// |next_input_index| is the index of the next unprocessed character in |spec|.
-// |input_len| is the length of |spec|.
-// |last_invalid_percent_index| is the index in |output| of a previously-seen
-// '%' character. The caller knows this '%' character isn't followed by a valid
-// escape sequence in the input string.
-// |output| is the canonicalized output thus far. The caller guarantees this
-// ends with a '%' followed by one or two characters, and the '%' is the one
-// pointed to by |last_invalid_percent_index|. The last character in the string
-// was just unescaped.
-template <typename CHAR>
-void CheckForNestedEscapes(const CHAR* spec,
- size_t next_input_index,
- size_t input_len,
- size_t last_invalid_percent_index,
- CanonOutput* output) {
- const size_t length = output->length();
- const char last_unescaped_char = output->at(length - 1);
-
- // If |output| currently looks like "%c", we need to try appending the next
- // input character to see if this will result in a problematic escape
- // sequence. Note that this won't trigger on the first nested escape of a
- // two-escape sequence like "%%30%30" -- we'll allow the conversion to
- // "%0%30" -- but the second nested escape will be caught by this function
- // when it's called again in that case.
- const bool append_next_char = last_invalid_percent_index == length - 2;
- if (append_next_char) {
- // If the input doesn't contain a 7-bit character next, this case won't be a
- // problem.
- if ((next_input_index == input_len) || (spec[next_input_index] >= 0x80))
- return;
- output->push_back(static_cast<char>(spec[next_input_index]));
- }
-
- // Now output ends like "%cc". Try to unescape this.
- size_t begin = last_invalid_percent_index;
- unsigned char temp;
- if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
- // New escape sequence found. Overwrite the characters following the '%'
- // with "25", and push_back() the one or two characters that were following
- // the '%' when we were called.
- if (!append_next_char)
- output->push_back(output->at(last_invalid_percent_index + 1));
- output->set(last_invalid_percent_index + 1, '2');
- output->set(last_invalid_percent_index + 2, '5');
- output->push_back(last_unescaped_char);
- } else if (append_next_char) {
- // Not a valid escape sequence, but we still need to undo appending the next
- // source character so the caller can process it normally.
- output->set_length(length);
- }
-}
-
// Canonicalizes and appends the given path to the output. It assumes that if
// the input path starts with a slash, it should be copied to the output.
//
@@ -261,13 +187,7 @@ bool DoPartialPathInternal(const CHAR* spec,
size_t end = static_cast<size_t>(path.end());
- // We use this variable to minimize the amount of work done when unescaping --
- // we'll only call CheckForNestedEscapes() when this points at one of the last
- // couple of characters in |output|.
- absl::optional<size_t> last_invalid_percent_index;
-
bool success = true;
- bool unescape_escaped_char = false;
for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -307,9 +227,6 @@ bool DoPartialPathInternal(const CHAR* spec,
break;
case DIRECTORY_UP:
BackUpToPreviousSlash(path_begin_in_output, output);
- if (last_invalid_percent_index >= output->length()) {
- last_invalid_percent_index = absl::nullopt;
- }
i += dotlen + consumed_len - 1;
break;
}
@@ -326,46 +243,18 @@ bool DoPartialPathInternal(const CHAR* spec,
} else if (out_ch == '%') {
// Handle escape sequences.
- unsigned char unescaped_value;
- if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
- // Valid escape sequence, see if we keep, reject, or unescape it.
- // Note that at this point DecodeEscape() will have advanced |i| to
- // the last character of the escape sequence.
- char unescaped_flags = kPathCharLookup[unescaped_value];
-
- if (unescaped_flags & UNESCAPE) {
- // This escaped value shouldn't be escaped. Try to copy it.
- unescape_escaped_char = true;
-
- output->push_back(unescaped_value);
- // If we just unescaped a value within 2 output characters of the
- // '%' from a previously-detected invalid escape sequence, we
- // might have an input string with problematic nested escape
- // sequences; detect and fix them.
- if (last_invalid_percent_index.has_value() &&
- ((last_invalid_percent_index.value() + 3) >=
- output->length())) {
- CheckForNestedEscapes(spec, i + 1, end,
- last_invalid_percent_index.value(),
- output);
- }
- } else {
- // Either this is an invalid escaped character, or it's a valid
- // escaped character we should keep escaped. In the first case we
- // should just copy it exactly and remember the error. In the
- // second we also copy exactly in case the server is sensitive to
- // changing the case of any hex letters.
- output->push_back('%');
- output->push_back(static_cast<char>(spec[i - 1]));
- output->push_back(static_cast<char>(spec[i]));
- }
+ unsigned char unused_unescaped_value;
+ if (DecodeEscaped(spec, &i, end, &unused_unescaped_value)) {
+ // Valid escape sequence. We should just copy it exactly.
+ output->push_back('%');
+ output->push_back(static_cast<char>(spec[i - 1]));
+ output->push_back(static_cast<char>(spec[i]));
} else {
// Invalid escape sequence. IE7+ rejects any URLs with such
// sequences, while other browsers pass them through unchanged. We
// use the permissive behavior.
// TODO(brettw): Consider testing IE's strict behavior, which would
// allow removing the code to handle nested escapes above.
- last_invalid_percent_index = output->length();
output->push_back('%');
}
} else if (flags & ESCAPE_BIT) {
@@ -378,8 +267,6 @@ bool DoPartialPathInternal(const CHAR* spec,
}
}
}
- base::UmaHistogramBoolean("URL.Path.UnescapeEscapedChar",
- unescape_escaped_char);
return success;
}
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h
index 528f91f2f..542860a04 100644
--- a/url/url_canon_stdstring.h
+++ b/url/url_canon_stdstring.h
@@ -10,11 +10,11 @@
// we have segregated it here.
#include <string>
+#include <string_view>
#include "base/compiler_specific.h"
#include "base/component_export.h"
#include "base/memory/raw_ptr_exclusion.h"
-#include "base/strings/string_piece.h"
#include "url/url_canon.h"
namespace url {
@@ -55,64 +55,64 @@ class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput {
};
// An extension of the Replacements class that allows the setters to use
-// StringPieces (implicitly allowing strings or char*s).
+// string_views (implicitly allowing strings or char*s).
//
-// The contents of the StringPieces are not copied and must remain valid until
-// the StringPieceReplacements object goes out of scope.
+// The contents of the string_views are not copied and must remain valid until
+// the StringViewReplacements object goes out of scope.
//
// In order to make it harder to misuse the API the setters do not accept rvalue
// references to std::strings.
// Note: Extra const char* overloads are necessary to break ambiguities that
// would otherwise exist for char literals.
template <typename CharT>
-class StringPieceReplacements : public Replacements<CharT> {
+class StringViewReplacements : public Replacements<CharT> {
private:
using StringT = std::basic_string<CharT>;
- using StringPieceT = base::BasicStringPiece<CharT>;
+ using StringViewT = std::basic_string_view<CharT>;
using ParentT = Replacements<CharT>;
using SetterFun = void (ParentT::*)(const CharT*, const Component&);
- void SetImpl(SetterFun fun, StringPieceT str) {
+ void SetImpl(SetterFun fun, StringViewT str) {
(this->*fun)(str.data(), Component(0, static_cast<int>(str.size())));
}
public:
void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
- void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
+ void SetSchemeStr(StringViewT str) { SetImpl(&ParentT::SetScheme, str); }
void SetSchemeStr(const StringT&&) = delete;
void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
- void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
+ void SetUsernameStr(StringViewT str) { SetImpl(&ParentT::SetUsername, str); }
void SetUsernameStr(const StringT&&) = delete;
using ParentT::ClearUsername;
void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
- void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
+ void SetPasswordStr(StringViewT str) { SetImpl(&ParentT::SetPassword, str); }
void SetPasswordStr(const StringT&&) = delete;
using ParentT::ClearPassword;
void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
- void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
+ void SetHostStr(StringViewT str) { SetImpl(&ParentT::SetHost, str); }
void SetHostStr(const StringT&&) = delete;
using ParentT::ClearHost;
void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
- void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
+ void SetPortStr(StringViewT str) { SetImpl(&ParentT::SetPort, str); }
void SetPortStr(const StringT&&) = delete;
using ParentT::ClearPort;
void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
- void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
+ void SetPathStr(StringViewT str) { SetImpl(&ParentT::SetPath, str); }
void SetPathStr(const StringT&&) = delete;
using ParentT::ClearPath;
void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
- void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
+ void SetQueryStr(StringViewT str) { SetImpl(&ParentT::SetQuery, str); }
void SetQueryStr(const StringT&&) = delete;
using ParentT::ClearQuery;
void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
- void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
+ void SetRefStr(StringViewT str) { SetImpl(&ParentT::SetRef, str); }
void SetRefStr(const StringT&&) = delete;
using ParentT::ClearRef;
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 0fe0cb34f..6e48a8b88 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -6,11 +6,11 @@
#include <errno.h>
#include <stddef.h>
+#include <string_view>
-#include "base/strings/string_piece.h"
+#include "base/strings/string_number_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
-#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -55,17 +55,6 @@ struct IPAddressCase {
const char* expected_address_hex; // Two hex chars per IP address byte.
};
-std::string BytesToHexString(unsigned char bytes[16], int length) {
- EXPECT_TRUE(length == 0 || length == 4 || length == 16)
- << "Bad IP address length: " << length;
- std::string result;
- for (int i = 0; i < length; ++i) {
- result.push_back(kHexCharLookup[(bytes[i] >> 4) & 0xf]);
- result.push_back(kHexCharLookup[bytes[i] & 0xf]);
- }
- return result;
-}
-
struct ReplaceCase {
const char* base;
const char* scheme;
@@ -119,12 +108,12 @@ TEST(URLCanonTest, DoAppendUTF8) {
{0x10FFFF, "\xF4\x8F\xBF\xBF"},
};
std::string out_str;
- for (size_t i = 0; i < std::size(utf_cases); i++) {
+ for (const auto& utf_case : utf_cases) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- AppendUTF8Value(utf_cases[i].input, &output);
+ AppendUTF8Value(utf_case.input, &output);
output.Complete();
- EXPECT_EQ(utf_cases[i].output, out_str);
+ EXPECT_EQ(utf_case.output, out_str);
}
}
@@ -171,27 +160,27 @@ TEST(URLCanonTest, UTF) {
};
std::string out_str;
- for (size_t i = 0; i < std::size(utf_cases); i++) {
- if (utf_cases[i].input8) {
+ for (const auto& utf_case : utf_cases) {
+ if (utf_case.input8) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- size_t input_len = strlen(utf_cases[i].input8);
+ size_t input_len = strlen(utf_case.input8);
bool success = true;
for (size_t ch = 0; ch < input_len; ch++) {
- success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
- &output);
+ success &=
+ AppendUTF8EscapedChar(utf_case.input8, &ch, input_len, &output);
}
output.Complete();
- EXPECT_EQ(utf_cases[i].expected_success, success);
- EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+ EXPECT_EQ(utf_case.expected_success, success);
+ EXPECT_EQ(utf_case.output, out_str);
}
- if (utf_cases[i].input16) {
+ if (utf_case.input16) {
out_str.clear();
StdStringCanonOutput output(&out_str);
std::u16string input_str(
- test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(utf_case.input16));
size_t input_len = input_str.length();
bool success = true;
for (size_t ch = 0; ch < input_len; ch++) {
@@ -199,18 +188,17 @@ TEST(URLCanonTest, UTF) {
&output);
}
output.Complete();
- EXPECT_EQ(utf_cases[i].expected_success, success);
- EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+ EXPECT_EQ(utf_case.expected_success, success);
+ EXPECT_EQ(utf_case.output, out_str);
}
- if (utf_cases[i].input8 && utf_cases[i].input16 &&
- utf_cases[i].expected_success) {
+ if (utf_case.input8 && utf_case.input16 && utf_case.expected_success) {
// Check that the UTF-8 and UTF-16 inputs are equivalent.
// UTF-16 -> UTF-8
- std::string input8_str(utf_cases[i].input8);
+ std::string input8_str(utf_case.input8);
std::u16string input16_str(
- test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(utf_case.input16));
EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
// UTF-8 -> UTF-16
@@ -242,36 +230,36 @@ TEST(URLCanonTest, Scheme) {
std::string out_str;
- for (size_t i = 0; i < std::size(scheme_cases); i++) {
- int url_len = static_cast<int>(strlen(scheme_cases[i].input));
+ for (const auto& scheme_case : scheme_cases) {
+ int url_len = static_cast<int>(strlen(scheme_case.input));
Component in_comp(0, url_len);
Component out_comp;
out_str.clear();
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizeScheme(scheme_cases[i].input, in_comp, &output1,
- &out_comp);
+ bool success =
+ CanonicalizeScheme(scheme_case.input, in_comp, &output1, &out_comp);
output1.Complete();
- EXPECT_EQ(scheme_cases[i].expected_success, success);
- EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
- EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(scheme_case.expected_success, success);
+ EXPECT_EQ(scheme_case.expected, out_str);
+ EXPECT_EQ(scheme_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(scheme_case.expected_component.len, out_comp.len);
// Now try the wide version.
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(scheme_case.input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
output2.Complete();
- EXPECT_EQ(scheme_cases[i].expected_success, success);
- EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
- EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(scheme_case.expected_success, success);
+ EXPECT_EQ(scheme_case.expected, out_str);
+ EXPECT_EQ(scheme_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(scheme_case.expected_component.len, out_comp.len);
}
// Test the case where the scheme is declared nonexistent, it should be
@@ -283,7 +271,7 @@ TEST(URLCanonTest, Scheme) {
EXPECT_FALSE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
output.Complete();
- EXPECT_EQ(std::string(":"), out_str);
+ EXPECT_EQ(":", out_str);
EXPECT_EQ(0, out_comp.begin);
EXPECT_EQ(0, out_comp.len);
}
@@ -315,23 +303,29 @@ INSTANTIATE_TEST_SUITE_P(All,
TEST_P(URLCanonHostTest, Host) {
bool use_idna_non_transitional = IsUsingIDNA2008NonTransitional();
+ // clang-format off
IPAddressCase host_cases[] = {
// Basic canonicalization, uppercase should be converted to lowercase.
{"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10),
CanonHostInfo::NEUTRAL, -1, ""},
- // Spaces and some other characters should be escaped.
- {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com",
- Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+ // TODO(https://crbug.com/1416013): Update the test after SPACE is
+ // correctly handled.
+ {"Goo%20 goo.com", L"Goo%20 goo.com", "goo%20%20goo.com",
+ Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
+ // TODO(https://crbug.com/1416013): Update the test after ASTERISK is
+ // correctly handled.
+ {"Goo%2a*goo.com", L"Goo%2a*goo.com", "goo%2A%2Agoo.com",
+ Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
// Exciting different types of spaces!
- {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
+ {nullptr, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
CanonHostInfo::NEUTRAL, -1, ""},
// Other types of space (no-break, zero-width, zero-width-no-break) are
// name-prepped away to nothing.
- {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
+ {nullptr, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
CanonHostInfo::NEUTRAL, -1, ""},
// Ideographic full stop (full-width period for Chinese, etc.) should be
// treated as a dot.
- {NULL,
+ {nullptr,
L"www.foo\x3002"
L"bar.com",
"www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
@@ -437,7 +431,7 @@ TEST_P(URLCanonHostTest, Host) {
// Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
// However, we do allow this at the moment because we don't use
// STD3 rules and canonicalize full-width ASCII to ASCII.
- {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", Component(0, 6),
+ {"wow\xef\xbc\x81", L"wow\xff01", "wow!", Component(0, 4),
CanonHostInfo::NEUTRAL, -1, ""},
// U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
// Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
@@ -575,7 +569,7 @@ TEST_P(URLCanonHostTest, Host) {
// before punycode string was created. I.e.
// if '(' is escaped after punycode is created we would get xn--%28-8tb
// (incorrect).
- {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+ {"\xd1\x82(", L"\x0442(", "xn--(-8tb", Component(0, 9),
CanonHostInfo::NEUTRAL, -1, ""},
// Address with all hexadecimal characters with leading number of 1<<32
// or greater and should return NEUTRAL rather than BROKEN if not all
@@ -597,37 +591,38 @@ TEST_P(URLCanonHostTest, Host) {
{"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
};
+ // clang-format on
// CanonicalizeHost() non-verbose.
std::string out_str;
- for (size_t i = 0; i < std::size(host_cases); i++) {
+ for (const auto& host_case : host_cases) {
// Narrow version.
- if (host_cases[i].input8) {
- int host_len = static_cast<int>(strlen(host_cases[i].input8));
+ if (host_case.input8) {
+ int host_len = static_cast<int>(strlen(host_case.input8));
Component in_comp(0, host_len);
Component out_comp;
out_str.clear();
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeHost(host_cases[i].input8, in_comp, &output,
- &out_comp);
+ bool success =
+ CanonicalizeHost(host_case.input8, in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
- success) << "for input: " << host_cases[i].input8;
- EXPECT_EQ(std::string(host_cases[i].expected), out_str) <<
- "for input: " << host_cases[i].input8;
- EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) <<
- "for input: " << host_cases[i].input8;
- EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) <<
- "for input: " << host_cases[i].input8;
+ EXPECT_EQ(host_case.expected_family != CanonHostInfo::BROKEN, success)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(host_case.expected, out_str)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(host_case.expected_component.begin, out_comp.begin)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(host_case.expected_component.len, out_comp.len)
+ << "for input: " << host_case.input8;
}
// Wide version.
- if (host_cases[i].input16) {
+ if (host_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(host_case.input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
Component out_comp;
@@ -639,46 +634,45 @@ TEST_P(URLCanonHostTest, Host) {
&out_comp);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
- success);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(host_case.expected_family != CanonHostInfo::BROKEN, success);
+ EXPECT_EQ(host_case.expected, out_str);
+ EXPECT_EQ(host_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(host_case.expected_component.len, out_comp.len);
}
}
// CanonicalizeHostVerbose()
- for (size_t i = 0; i < std::size(host_cases); i++) {
+ for (const auto& host_case : host_cases) {
// Narrow version.
- if (host_cases[i].input8) {
- int host_len = static_cast<int>(strlen(host_cases[i].input8));
+ if (host_case.input8) {
+ int host_len = static_cast<int>(strlen(host_case.input8));
Component in_comp(0, host_len);
out_str.clear();
StdStringCanonOutput output(&out_str);
CanonHostInfo host_info;
- CanonicalizeHostVerbose(host_cases[i].input8, in_comp, &output,
- &host_info);
+ CanonicalizeHostVerbose(host_case.input8, in_comp, &output, &host_info);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin,
- host_info.out_host.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength()));
- if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
- EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+ EXPECT_EQ(host_case.expected_family, host_info.family);
+ EXPECT_EQ(host_case.expected, out_str);
+ EXPECT_EQ(host_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(host_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(
+ host_case.expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())));
+ if (host_case.expected_family == CanonHostInfo::IPV4) {
+ EXPECT_EQ(host_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
// Wide version.
- if (host_cases[i].input16) {
+ if (host_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(host_case.input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -689,21 +683,73 @@ TEST_P(URLCanonHostTest, Host) {
CanonicalizeHostVerbose(input16.c_str(), in_comp, &output, &host_info);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin,
- host_info.out_host.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength()));
- if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
- EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+ EXPECT_EQ(host_case.expected_family, host_info.family);
+ EXPECT_EQ(host_case.expected, out_str);
+ EXPECT_EQ(host_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(host_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(
+ host_case.expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())));
+ if (host_case.expected_family == CanonHostInfo::IPV4) {
+ EXPECT_EQ(host_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
}
}
+TEST(URLCanonTest, HostPuncutationChar) {
+ // '%' is not tested here. '%' is used for percent-escaping.
+ const std::string_view allowed_host_chars[] = {
+ "!", "\"", "$", "&", "'", "(", ")", "+", ",",
+ "-", ".", ";", "=", "_", "`", "{", "}", "~",
+ };
+
+ const std::string_view forbidden_host_chars[] = {
+ "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|",
+ };
+
+ // Standard non-compliant characters which are escaped. See
+ // https://crbug.com/1416013.
+ struct EscapedCharTestCase {
+ std::string_view input;
+ std::string_view expected;
+ } escaped_host_chars[] = {{" ", "%20"}, {"*", "%2A"}};
+
+ for (const std::string_view input : allowed_host_chars) {
+ std::string out_str;
+ Component in_comp(0, input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ bool success = CanonicalizeHost(input.data(), in_comp, &output, &out_comp);
+ EXPECT_TRUE(success) << "Input: " << input;
+ output.Complete();
+ EXPECT_EQ(out_str, input) << "Input: " << input;
+ }
+
+ for (const std::string_view input : forbidden_host_chars) {
+ std::string out_str;
+ Component in_comp(0, input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_FALSE(CanonicalizeHost(input.data(), in_comp, &output, &out_comp))
+ << "Input: " << input;
+ }
+
+ for (const auto& c : escaped_host_chars) {
+ std::string out_str;
+ Component in_comp(0, c.input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ bool success =
+ CanonicalizeHost(c.input.data(), in_comp, &output, &out_comp);
+ EXPECT_TRUE(success) << "Input: " << c.input;
+ output.Complete();
+ EXPECT_EQ(out_str, c.expected) << "Input: " << c.input;
+ }
+}
+
TEST(URLCanonTest, IPv4) {
// clang-format off
IPAddressCase cases[] = {
@@ -822,8 +868,9 @@ TEST(URLCanonTest, IPv4) {
output1.Complete();
EXPECT_EQ(test_case.expected_family, host_info.family);
- EXPECT_EQ(std::string(test_case.expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength()));
+ EXPECT_EQ(test_case.expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())));
if (host_info.family == CanonHostInfo::IPV4) {
EXPECT_STREQ(test_case.expected, out_str1.c_str());
EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
@@ -843,8 +890,9 @@ TEST(URLCanonTest, IPv4) {
output2.Complete();
EXPECT_EQ(test_case.expected_family, host_info.family);
- EXPECT_EQ(std::string(test_case.expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength()));
+ EXPECT_EQ(test_case.expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())));
if (host_info.family == CanonHostInfo::IPV4) {
EXPECT_STREQ(test_case.expected, out_str2.c_str());
EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
@@ -855,153 +903,162 @@ TEST(URLCanonTest, IPv4) {
}
}
-class URLCanonIPv6Test
- : public ::testing::Test,
- public ::testing::WithParamInterface<bool> {
- public:
- URLCanonIPv6Test() {
- if (GetParam()) {
- scoped_feature_list_.InitAndEnableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
- } else {
- scoped_feature_list_.InitAndDisableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
- }
- }
-
- private:
- base::test::ScopedFeatureList scoped_feature_list_;
-};
-
-INSTANTIATE_TEST_SUITE_P(All,
- URLCanonIPv6Test,
- ::testing::Bool());
-
-TEST_P(URLCanonIPv6Test, IPv6) {
- bool strict_ipv4_embedded_ipv6_parsing =
- base::FeatureList::IsEnabled(url::kStrictIPv4EmbeddedIPv6AddressParsing);
-
+TEST(URLCanonTest, IPv6) {
IPAddressCase cases[] = {
// Empty is not an IP address.
- {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Non-IPs with [:] characters are marked BROKEN.
- {":", L":", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[", L"[", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[:", L"[:", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"]", L"]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {":]", L":]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[]", L"[]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[:]", L"[:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {":", L":", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[", L"[", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[:", L"[:", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"]", L"]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {":]", L":]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[]", L"[]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[:]", L"[:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Regular IP address is invalid without bounding '[' and ']'.
- {"2001:db8::1", L"2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[2001:db8::1", L"[2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"2001:db8::1]", L"2001:db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"2001:db8::1", L"2001:db8::1", "", Component(), CanonHostInfo::BROKEN,
+ -1, ""},
+ {"[2001:db8::1", L"[2001:db8::1", "", Component(), CanonHostInfo::BROKEN,
+ -1, ""},
+ {"2001:db8::1]", L"2001:db8::1]", "", Component(), CanonHostInfo::BROKEN,
+ -1, ""},
// Regular IP addresses.
- {"[::]", L"[::]", "[::]", Component(0,4), CanonHostInfo::IPV6, -1, "00000000000000000000000000000000"},
- {"[::1]", L"[::1]", "[::1]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000001"},
- {"[1::]", L"[1::]", "[1::]", Component(0,5), CanonHostInfo::IPV6, -1, "00010000000000000000000000000000"},
-
- // Leading zeros should be stripped.
- {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", Component(0,17), CanonHostInfo::IPV6, -1, "00000001000200030004000500060007"},
-
- // Upper case letters should be lowercased.
- {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", Component(0,20), CanonHostInfo::IPV6, -1, "000A000B000C00DE00FF0000000100AC"},
-
- // The same address can be written with different contractions, but should
- // get canonicalized to the same thing.
- {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
- {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
-
- // Addresses with embedded IPv4.
- {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", Component(0,10), CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"},
- {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
- {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]", Component(0, 15), CanonHostInfo::IPV6, -1, "00000000000000000000EEEEC0A80001"},
- {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"},
- {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // IPv4 embedded IPv6 addresses
- {"[::ffff:192.1.2]",
- L"[::ffff:192.1.2]",
- "[::ffff:c001:2]",
- strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
- strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
- -1,
- (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0010002")},
- {"[::ffff:192.1]",
- L"[::ffff:192.1]",
- "[::ffff:c000:1]",
- strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
- strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
- -1,
- (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0000001")},
- {"[::ffff:192.1.2.3.4]",
- L"[::ffff:192.1.2.3.4]",
- "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // IPv4 using hex.
- // TODO(eroman): Should this format be disallowed?
- {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
-
- // There may be zeros surrounding the "::" contraction.
- {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"},
-
- {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
-
- // Can only have one "::" contraction in an IPv6 string literal.
- {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // No more than 2 consecutive ':'s.
- {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Non-IP addresses due to invalid characters.
- {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // If there are not enough components, the last one should fill them out.
- // ... omitted at this time ...
- // Too many components means not an IP address. Similarly, with too few
- // if using IPv4 compat or mapped addresses.
- {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Too many bits (even though 8 comonents, the last one holds 32 bits).
- {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // Too many bits specified -- the contraction would have to be zero-length
- // to not exceed 128 bits.
- {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // The contraction is for 16 bits of zero.
- {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", Component(0,17), CanonHostInfo::IPV6, -1, "00010002000300040005000600000008"},
-
- // Cannot have a trailing colon.
- {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // Cannot have negative numbers.
- {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
- // The scope_id should be included in the canonicalized URL, and is an
- // unsigned decimal number.
-
- // Invalid because no ID was given after the percent.
-
- // Don't allow scope-id
- {"[1::%1]", L"[1::%1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[1::%eth0]", L"[1::%eth0]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[1::%]", L"[1::%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[%]", L"[%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[::%:]", L"[::%:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-
- // Don't allow leading or trailing colons.
- {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[::]", L"[::]", "[::]", Component(0, 4), CanonHostInfo::IPV6, -1,
+ "00000000000000000000000000000000"},
+ {"[::1]", L"[::1]", "[::1]", Component(0, 5), CanonHostInfo::IPV6, -1,
+ "00000000000000000000000000000001"},
+ {"[1::]", L"[1::]", "[1::]", Component(0, 5), CanonHostInfo::IPV6, -1,
+ "00010000000000000000000000000000"},
+
+ // Leading zeros should be stripped.
+ {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]",
+ "[0:1:2:3:4:5:6:7]", Component(0, 17), CanonHostInfo::IPV6, -1,
+ "00000001000200030004000500060007"},
+
+ // Upper case letters should be lowercased.
+ {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]",
+ Component(0, 20), CanonHostInfo::IPV6, -1,
+ "000A000B000C00DE00FF0000000100AC"},
+
+ // The same address can be written with different contractions, but should
+ // get canonicalized to the same thing.
+ {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", Component(0, 14),
+ CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+ {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", Component(0, 14),
+ CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+
+ // Addresses with embedded IPv4.
+ {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", Component(0, 10),
+ CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"},
+ {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]",
+ Component(0, 15), CanonHostInfo::IPV6, -1,
+ "00000000000000000000FFFFC0A80001"},
+ {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]",
+ Component(0, 15), CanonHostInfo::IPV6, -1,
+ "00000000000000000000EEEEC0A80001"},
+ {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]",
+ Component(0, 14), CanonHostInfo::IPV6, -1,
+ "200100000000000000000000C0A80001"},
+ {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+
+ // IPv4 embedded IPv6 addresses
+ {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[::ffff:192.1]", L"[::ffff:192.1]", "[::ffff:c000:1]", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[::ffff:192.1.2.3.4]", L"[::ffff:192.1.2.3.4]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+
+ // IPv4 using hex.
+ // TODO(eroman): Should this format be disallowed?
+ {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]",
+ "[::ffff:c0a8:1]", Component(0, 15), CanonHostInfo::IPV6, -1,
+ "00000000000000000000FFFFC0A80001"},
+
+ // There may be zeros surrounding the "::" contraction.
+ {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", Component(0, 5),
+ CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"},
+
+ {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0, 13),
+ CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
+
+ // Can only have one "::" contraction in an IPv6 string literal.
+ {"[2001::db8::1]", L"[2001::db8::1]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ // No more than 2 consecutive ':'s.
+ {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Non-IP addresses due to invalid characters.
+ {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN,
+ -1, ""},
+ // If there are not enough components, the last one should fill them out.
+ // ... omitted at this time ...
+ // Too many components means not an IP address. Similarly, with too few
+ // if using IPv4 compat or mapped addresses.
+ {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ // Too many bits (even though 8 components, the last one holds 32 bits).
+ {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "",
+ Component(), CanonHostInfo::BROKEN, -1, ""},
+
+ // Too many bits specified -- the contraction would have to be zero-length
+ // to not exceed 128 bits.
+ {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "",
+ Component(), CanonHostInfo::BROKEN, -1, ""},
+
+ // The contraction is for 16 bits of zero.
+ {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]",
+ Component(0, 17), CanonHostInfo::IPV6, -1,
+ "00010002000300040005000600000008"},
+
+ // Cannot have a trailing colon.
+ {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "",
+ Component(), CanonHostInfo::BROKEN, -1, ""},
+
+ // Cannot have negative numbers.
+ {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+
+ // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
+ // The scope_id should be included in the canonicalized URL, and is an
+ // unsigned decimal number.
+
+ // Invalid because no ID was given after the percent.
+
+ // Don't allow scope-id
+ {"[1::%1]", L"[1::%1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[1::%eth0]", L"[1::%eth0]", "", Component(), CanonHostInfo::BROKEN, -1,
+ ""},
+ {"[1::%]", L"[1::%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[%]", L"[%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[::%:]", L"[::%:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+ // Don't allow leading or trailing colons.
+ {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
// We allow a single trailing dot.
- // ... omitted at this time ...
+ // ... omitted at this time ...
// Two dots in a row means not an IP address.
- {"[::192.168..1]", L"[::192.168..1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[::192.168..1]", L"[::192.168..1]", "", Component(),
+ CanonHostInfo::BROKEN, -1, ""},
// Any non-first components get truncated to one byte.
- // ... omitted at this time ...
+ // ... omitted at this time ...
// Spaces should be rejected.
- {"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN,
+ -1, ""},
};
for (size_t i = 0; i < std::size(cases); i++) {
@@ -1015,8 +1072,10 @@ TEST_P(URLCanonIPv6Test, IPv6) {
output1.Complete();
EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength())) << "iter " << i << " host " << cases[i].input8;
+ EXPECT_EQ(cases[i].expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())))
+ << "iter " << i << " host " << cases[i].input8;
if (host_info.family == CanonHostInfo::IPV6) {
EXPECT_STREQ(cases[i].expected, out_str1.c_str());
EXPECT_EQ(cases[i].expected_component.begin,
@@ -1035,8 +1094,9 @@ TEST_P(URLCanonIPv6Test, IPv6) {
output2.Complete();
EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
- BytesToHexString(host_info.address, host_info.AddressLength()));
+ EXPECT_EQ(cases[i].expected_address_hex,
+ base::HexEncode(host_info.address,
+ static_cast<size_t>(host_info.AddressLength())));
if (host_info.family == CanonHostInfo::IPV6) {
EXPECT_STREQ(cases[i].expected, out_str2.c_str());
EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
@@ -1130,34 +1190,30 @@ TEST(URLCanonTest, UserInfo) {
{"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
};
- for (size_t i = 0; i < std::size(user_info_cases); i++) {
- int url_len = static_cast<int>(strlen(user_info_cases[i].input));
+ for (const auto& user_info_case : user_info_cases) {
+ int url_len = static_cast<int>(strlen(user_info_case.input));
Parsed parsed;
- ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
+ ParseStandardURL(user_info_case.input, url_len, &parsed);
Component out_user, out_pass;
std::string out_str;
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizeUserInfo(user_info_cases[i].input,
- parsed.username,
- user_info_cases[i].input,
- parsed.password,
- &output1,
- &out_user,
- &out_pass);
+ bool success = CanonicalizeUserInfo(user_info_case.input, parsed.username,
+ user_info_case.input, parsed.password,
+ &output1, &out_user, &out_pass);
output1.Complete();
- EXPECT_EQ(user_info_cases[i].expected_success, success);
- EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
- EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
- EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
- EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
- EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+ EXPECT_EQ(user_info_case.expected_success, success);
+ EXPECT_EQ(user_info_case.expected, out_str);
+ EXPECT_EQ(user_info_case.expected_username.begin, out_user.begin);
+ EXPECT_EQ(user_info_case.expected_username.len, out_user.len);
+ EXPECT_EQ(user_info_case.expected_password.begin, out_pass.begin);
+ EXPECT_EQ(user_info_case.expected_password.len, out_pass.len);
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(user_info_case.input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1167,12 +1223,12 @@ TEST(URLCanonTest, UserInfo) {
&out_pass);
output2.Complete();
- EXPECT_EQ(user_info_cases[i].expected_success, success);
- EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
- EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
- EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
- EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
- EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+ EXPECT_EQ(user_info_case.expected_success, success);
+ EXPECT_EQ(user_info_case.expected, out_str);
+ EXPECT_EQ(user_info_case.expected_username.begin, out_user.begin);
+ EXPECT_EQ(user_info_case.expected_username.len, out_user.len);
+ EXPECT_EQ(user_info_case.expected_password.begin, out_pass.begin);
+ EXPECT_EQ(user_info_case.expected_password.len, out_pass.len);
}
}
@@ -1199,39 +1255,33 @@ TEST(URLCanonTest, Port) {
{"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true},
};
- for (size_t i = 0; i < std::size(port_cases); i++) {
- int url_len = static_cast<int>(strlen(port_cases[i].input));
+ for (const auto& port_case : port_cases) {
+ int url_len = static_cast<int>(strlen(port_case.input));
Component in_comp(0, url_len);
Component out_comp;
std::string out_str;
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizePort(port_cases[i].input,
- in_comp,
- port_cases[i].default_port,
- &output1,
- &out_comp);
+ bool success = CanonicalizePort(
+ port_case.input, in_comp, port_case.default_port, &output1, &out_comp);
output1.Complete();
- EXPECT_EQ(port_cases[i].expected_success, success);
- EXPECT_EQ(std::string(port_cases[i].expected), out_str);
- EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(port_case.expected_success, success);
+ EXPECT_EQ(port_case.expected, out_str);
+ EXPECT_EQ(port_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(port_case.expected_component.len, out_comp.len);
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(base::UTF8ToUTF16(port_cases[i].input));
- success = CanonicalizePort(wide_input.c_str(),
- in_comp,
- port_cases[i].default_port,
- &output2,
- &out_comp);
+ std::u16string wide_input(base::UTF8ToUTF16(port_case.input));
+ success = CanonicalizePort(wide_input.c_str(), in_comp,
+ port_case.default_port, &output2, &out_comp);
output2.Complete();
- EXPECT_EQ(port_cases[i].expected_success, success);
- EXPECT_EQ(std::string(port_cases[i].expected), out_str);
- EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(port_case.expected_success, success);
+ EXPECT_EQ(port_case.expected, out_str);
+ EXPECT_EQ(port_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(port_case.expected_component.len, out_comp.len);
}
}
@@ -1275,13 +1325,13 @@ DualComponentCase kCommonPathCases[] = {
{"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true},
{nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22),
true},
- // Regular characters that are escaped should be unescaped
- {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
+ // Regular characters that are escaped should remain escaped
+ {"/foo%41%7a", L"/foo%41%7a", "/foo%41%7a", Component(0, 10), true},
// Funny characters that are unescaped should be escaped
{"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
{nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
// %00 should not cause failures.
- {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), true},
+ {"/foo%00%51", L"/foo%00%51", "/foo%00%51", Component(0, 10), true},
// Some characters should be passed through unchanged regardless of esc.
{"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
true},
@@ -1302,21 +1352,20 @@ DualComponentCase kCommonPathCases[] = {
"/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
// @ should be passed through unchanged (escaped or unescaped).
{"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
- // Nested escape sequences should result in escaping the leading '%' if
- // unescaping would result in a new escape sequence.
- {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
- {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
- {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
+ // Nested escape sequences no longer happen. See https://crbug.com/1252531.
+ {"/%A%42", L"/%A%42", "/%A%42", Component(0, 6), true},
+ {"/%%41B", L"/%%41B", "/%%41B", Component(0, 6), true},
+ {"/%%41%42", L"/%%41%42", "/%%41%42", Component(0, 8), true},
// Make sure truncated "nested" escapes don't result in reading off the
// string end.
- {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
+ {"/%%41", L"/%%41", "/%%41", Component(0, 5), true},
// Don't unescape the leading '%' if unescaping doesn't result in a valid
// new escape sequence.
- {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
- {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
+ {"/%%470", L"/%%470", "/%%470", Component(0, 6), true},
+ {"/%%2D%41", L"/%%2D%41", "/%%2D%41", Component(0, 8), true},
// Don't erroneously downcast a UTF-16 character in a way that makes it
// look like part of an escape sequence.
- {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
+ {nullptr, L"/%%41\x0130", "/%%41%C4%B0", Component(0, 11), true},
// ----- encoding tests -----
// Basic conversions
@@ -1438,34 +1487,34 @@ TEST(URLCanonTest, Query) {
{"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
};
- for (size_t i = 0; i < std::size(query_cases); i++) {
+ for (const auto& query_case : query_cases) {
Component out_comp;
- if (query_cases[i].input8) {
- int len = static_cast<int>(strlen(query_cases[i].input8));
+ if (query_case.input8) {
+ int len = static_cast<int>(strlen(query_case.input8));
Component in_comp(0, len);
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
+ CanonicalizeQuery(query_case.input8, in_comp, nullptr, &output,
&out_comp);
output.Complete();
- EXPECT_EQ(query_cases[i].expected, out_str);
+ EXPECT_EQ(query_case.expected, out_str);
}
- if (query_cases[i].input16) {
+ if (query_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(query_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(query_case.input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
+ CanonicalizeQuery(input16.c_str(), in_comp, nullptr, &output, &out_comp);
output.Complete();
- EXPECT_EQ(query_cases[i].expected, out_str);
+ EXPECT_EQ(query_case.expected, out_str);
}
}
@@ -1473,7 +1522,8 @@ TEST(URLCanonTest, Query) {
std::string out_str;
StdStringCanonOutput output(&out_str);
Component out_comp;
- CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+ CanonicalizeQuery("a \x00z\x01", Component(0, 5), nullptr, &output,
+ &out_comp);
output.Complete();
EXPECT_EQ("?a%20%00z%01", out_str);
}
@@ -1510,27 +1560,27 @@ TEST(URLCanonTest, Ref) {
{"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
};
- for (size_t i = 0; i < std::size(ref_cases); i++) {
+ for (const auto& ref_case : ref_cases) {
// 8-bit input
- if (ref_cases[i].input8) {
- int len = static_cast<int>(strlen(ref_cases[i].input8));
+ if (ref_case.input8) {
+ int len = static_cast<int>(strlen(ref_case.input8));
Component in_comp(0, len);
Component out_comp;
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeRef(ref_cases[i].input8, in_comp, &output, &out_comp);
+ CanonicalizeRef(ref_case.input8, in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
- EXPECT_EQ(ref_cases[i].expected, out_str);
+ EXPECT_EQ(ref_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(ref_case.expected_component.len, out_comp.len);
+ EXPECT_EQ(ref_case.expected, out_str);
}
// 16-bit input
- if (ref_cases[i].input16) {
+ if (ref_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(ref_case.input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
Component out_comp;
@@ -1540,9 +1590,9 @@ TEST(URLCanonTest, Ref) {
CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
- EXPECT_EQ(ref_cases[i].expected, out_str);
+ EXPECT_EQ(ref_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(ref_case.expected_component.len, out_comp.len);
+ EXPECT_EQ(ref_case.expected, out_str);
}
}
@@ -1565,24 +1615,14 @@ TEST(URLCanonTest, CanonicalizeStandardURL) {
// The individual component canonicalize tests should have caught the cases
// for each of those components. Here, we just need to test that the various
// parts are included or excluded properly, and have the correct separators.
+ // clang-format off
struct URLCase {
const char* input;
const char* expected;
bool expected_success;
} cases[] = {
- {"http://www.google.com/foo?bar=baz#",
- "http://www.google.com/foo?bar=baz#", true},
- {"http://[www.google.com]/", "http://[www.google.com]/", false},
- {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#",
- false},
- {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo",
- true},
- {"www.google.com", ":www.google.com/", false},
- {"http://192.0x00A80001", "http://192.168.0.1/", true},
- {"http://www/foo%2Ehtml", "http://www/foo.html", true},
- {"http://user:pass@/", "http://user:pass@/", false},
- {"http://%25DOMAIN:foobar@foodomain.com/",
- "http://%25DOMAIN:foobar@foodomain.com/", true},
+ {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#",
+ true},
// Backslashes should get converted to forward slashes.
{"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
@@ -1619,27 +1659,28 @@ TEST(URLCanonTest, CanonicalizeStandardURL) {
// ICU will convert to an ASCII one, generating "%81".
{"ws:)W\x1eW\xef\xb9\xaa"
"81:80/",
- "ws://%29w%1ew%81/", false},
+ "ws://)w%1ew%81/", false},
// Regression test for the last_invalid_percent_index bug described in
// https://crbug.com/1080890#c10.
- {R"(HTTP:S/5%\../>%41)", "http://s/%3EA", true},
+ {R"(HTTP:S/5%\../>%41)", "http://s/%3E%41", true},
};
+ // clang-format on
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseStandardURL(cases[i].input, url_len, &parsed);
+ ParseStandardURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success = CanonicalizeStandardURL(
- cases[i].input, url_len, parsed,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
+ i.input, url_len, parsed, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
+ nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
}
}
@@ -1666,8 +1707,8 @@ TEST(URLCanonTest, ReplaceStandardURL) {
"filesystem://a:b@google.com:22/foo?baz@cat"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseStandardURL(cur.base, base_len, &parsed);
@@ -1689,12 +1730,12 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceStandardURL(replace_cases[i].base, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ ReplaceStandardURL(replace_case.base, parsed, r,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
// The path pointer should be ignored if the address is invalid.
@@ -1713,7 +1754,7 @@ TEST(URLCanonTest, ReplaceStandardURL) {
StdStringCanonOutput output1(&out_str1);
Parsed new_parsed;
ReplaceStandardURL(src, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output1, &new_parsed);
output1.Complete();
EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
@@ -1723,7 +1764,7 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str2;
StdStringCanonOutput output2(&out_str2);
ReplaceStandardURL(src, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output2, &new_parsed);
output2.Complete();
EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
@@ -1766,8 +1807,8 @@ TEST(URLCanonTest, ReplaceFileURL) {
nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
SCOPED_TRACE(cur.base);
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1787,10 +1828,10 @@ TEST(URLCanonTest, ReplaceFileURL) {
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceFileURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+ ReplaceFileURL(cur.base, parsed, r, nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -1835,8 +1876,8 @@ TEST(URLCanonTest, ReplaceFileSystemURL) {
"filesystem:http://bar.com:40/t/gaba?query#ref"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseFileSystemURL(cur.base, base_len, &parsed);
@@ -1855,10 +1896,10 @@ TEST(URLCanonTest, ReplaceFileSystemURL) {
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceFileSystemURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+ ReplaceFileSystemURL(cur.base, parsed, r, nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -1879,8 +1920,8 @@ TEST(URLCanonTest, ReplacePathURL) {
nullptr, nullptr, "data:"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParsePathURL(cur.base, base_len, false, &parsed);
@@ -1902,36 +1943,46 @@ TEST(URLCanonTest, ReplacePathURL) {
ReplacePathURL(cur.base, parsed, r, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
TEST(URLCanonTest, ReplaceMailtoURL) {
ReplaceCase replace_cases[] = {
// Replace everything
- {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"},
+ {"mailto:jon@foo.com?body=sup", "mailto", nullptr, nullptr, nullptr,
+ nullptr, "addr1", "to=tony", nullptr, "mailto:addr1?to=tony"},
// Replace nothing
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, "mailto:jon@foo.com?body=sup"},
// Replace the path
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "jason", nullptr, nullptr, "mailto:jason?body=sup"},
// Replace the query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "custom=1", nullptr, "mailto:jon@foo.com?custom=1"},
// Replace the path and query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "jason", "custom=1", nullptr, "mailto:jason?custom=1"},
// Set the query to empty (should leave trailing question mark)
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "", nullptr, "mailto:jon@foo.com?"},
// Clear the query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "|", nullptr, "mailto:jon@foo.com"},
// Clear the path
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "|", nullptr, nullptr, "mailto:?body=sup"},
// Clear the path + query
- {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"},
+ {"mailto:", nullptr, nullptr, nullptr, nullptr, nullptr, "|", "|",
+ nullptr, "mailto:"},
// Setting the ref should have no effect
- {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
+ {"mailto:addr1", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, "BLAH", "mailto:addr1"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseMailtoURL(cur.base, base_len, &parsed);
@@ -1953,7 +2004,7 @@ TEST(URLCanonTest, ReplaceMailtoURL) {
ReplaceMailtoURL(cur.base, parsed, r, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -2043,31 +2094,31 @@ TEST(URLCanonTest, CanonicalizeFileURL) {
#endif // _WIN32
};
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseFileURL(cases[i].input, url_len, &parsed);
+ ParseFileURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeFileURL(cases[i].input, url_len, parsed, NULL,
+ bool success = CanonicalizeFileURL(i.input, url_len, parsed, nullptr,
&output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
// Make sure the spec was properly identified, the file canonicalizer has
// different code for writing the spec.
EXPECT_EQ(0, out_parsed.scheme.begin);
EXPECT_EQ(4, out_parsed.scheme.len);
- EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin);
- EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len);
+ EXPECT_EQ(i.expected_host.begin, out_parsed.host.begin);
+ EXPECT_EQ(i.expected_host.len, out_parsed.host.len);
- EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
- EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+ EXPECT_EQ(i.expected_path.begin, out_parsed.path.begin);
+ EXPECT_EQ(i.expected_path.len, out_parsed.path.len);
}
}
@@ -2089,23 +2140,23 @@ TEST(URLCanonTest, CanonicalizeFileSystemURL) {
{"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
{"filesystem:File:///temporary/Bob?qUery#reF",
"filesystem:file:///temporary/Bob?qUery#reF", true},
- {"FilEsysteM:htTp:E=/.", "filesystem:http://e%3D//", false},
+ {"FilEsysteM:htTp:E=/.", "filesystem:http://e=//", false},
};
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseFileSystemURL(cases[i].input, url_len, &parsed);
+ ParseFileSystemURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeFileSystemURL(cases[i].input, url_len, parsed,
- NULL, &output, &out_parsed);
+ bool success = CanonicalizeFileSystemURL(i.input, url_len, parsed, nullptr,
+ &output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
// Make sure the spec was properly identified, the filesystem canonicalizer
// has different code for writing the spec.
@@ -2131,26 +2182,26 @@ TEST(URLCanonTest, CanonicalizePathURL) {
{"javascript:\uFFFF", "javascript:%EF%BF%BF"},
};
- for (size_t i = 0; i < std::size(path_cases); i++) {
- int url_len = static_cast<int>(strlen(path_cases[i].input));
+ for (const auto& path_case : path_cases) {
+ int url_len = static_cast<int>(strlen(path_case.input));
Parsed parsed;
- ParsePathURL(path_cases[i].input, url_len, true, &parsed);
+ ParsePathURL(path_case.input, url_len, true, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizePathURL(path_cases[i].input, url_len, parsed,
+ bool success = CanonicalizePathURL(path_case.input, url_len, parsed,
&output, &out_parsed);
output.Complete();
EXPECT_TRUE(success);
- EXPECT_EQ(path_cases[i].expected, out_str);
+ EXPECT_EQ(path_case.expected, out_str);
EXPECT_EQ(0, out_parsed.host.begin);
EXPECT_EQ(-1, out_parsed.host.len);
// When we end with a colon at the end, there should be no path.
- if (path_cases[i].input[url_len - 1] == ':') {
+ if (path_case.input[url_len - 1] == ':') {
EXPECT_EQ(0, out_parsed.GetContent().begin);
EXPECT_EQ(-1, out_parsed.GetContent().len);
}
@@ -2169,20 +2220,20 @@ TEST(URLCanonTest, CanonicalizePathURLPath) {
{"\uFFFF", L"\uFFFF", "%EF%BF%BF"},
};
- for (size_t i = 0; i < std::size(path_cases); i++) {
+ for (const auto& path_case : path_cases) {
// 8-bit string input
std::string out_str;
StdStringCanonOutput output(&out_str);
url::Component out_component;
- CanonicalizePathURLPath(path_cases[i].input.data(),
- Component(0, path_cases[i].input.size()), &output,
+ CanonicalizePathURLPath(path_case.input.data(),
+ Component(0, path_case.input.size()), &output,
&out_component);
output.Complete();
- EXPECT_EQ(path_cases[i].expected, out_str);
+ EXPECT_EQ(path_case.expected, out_str);
EXPECT_EQ(0, out_component.begin);
- EXPECT_EQ(path_cases[i].expected.size(),
+ EXPECT_EQ(path_case.expected.size(),
static_cast<size_t>(out_component.len));
// 16-bit string input
@@ -2190,16 +2241,16 @@ TEST(URLCanonTest, CanonicalizePathURLPath) {
StdStringCanonOutput output16(&out_str16);
url::Component out_component16;
std::u16string input16(
- test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+ test_utils::TruncateWStringToUTF16(path_case.input16.data()));
CanonicalizePathURLPath(input16.c_str(),
- Component(0, path_cases[i].input16.size()),
- &output16, &out_component16);
+ Component(0, path_case.input16.size()), &output16,
+ &out_component16);
output16.Complete();
- EXPECT_EQ(path_cases[i].expected, out_str16);
+ EXPECT_EQ(path_case.expected, out_str16);
EXPECT_EQ(0, out_component16.begin);
- EXPECT_EQ(path_cases[i].expected.size(),
+ EXPECT_EQ(path_case.expected.size(),
static_cast<size_t>(out_component16.len));
}
}
@@ -2394,148 +2445,209 @@ TEST(URLCanonTest, ResolveRelativeURL) {
const char* resolved; // What we expect in the result when resolving.
} rel_cases[] = {
// Basic absolute input.
- {"http://host/a", true, false, "http://another/", true, false, false, NULL},
- {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
+ {"http://host/a", true, false, "http://another/", true, false, false,
+ nullptr},
+ {"http://host/a", true, false, "http:////another/", true, false, false,
+ nullptr},
// Empty relative URLs should only remove the ref part of the URL,
// leaving the rest unchanged.
- {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
- {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
- {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
+ {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
+ {"http://foo/bar#ref", true, false, "", true, true, true,
+ "http://foo/bar"},
+ {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
// Spaces at the ends of the relative path should be ignored.
- {"http://foo/bar", true, false, " another ", true, true, true, "http://foo/another"},
- {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"},
- {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"},
+ {"http://foo/bar", true, false, " another ", true, true, true,
+ "http://foo/another"},
+ {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"},
+ {"http://foo/bar", true, false, " \t ", true, true, true,
+ "http://foo/bar"},
// Matching schemes without two slashes are treated as relative.
- {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"},
- {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"},
- {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"},
- {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"},
+ {"http://host/a", true, false, "http:path", true, true, true,
+ "http://host/path"},
+ {"http://host/a/", true, false, "http:path", true, true, true,
+ "http://host/a/path"},
+ {"http://host/a", true, false, "http:/path", true, true, true,
+ "http://host/path"},
+ {"http://host/a", true, false, "HTTP:/path", true, true, true,
+ "http://host/path"},
// Nonmatching schemes are absolute.
- {"http://host/a", true, false, "https:host2", true, false, false, NULL},
- {"http://host/a", true, false, "htto:/host2", true, false, false, NULL},
+ {"http://host/a", true, false, "https:host2", true, false, false,
+ nullptr},
+ {"http://host/a", true, false, "htto:/host2", true, false, false,
+ nullptr},
// Absolute path input
- {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"},
- {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"},
- {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"},
- {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"},
- {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"},
- {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"},
+ {"http://host/a", true, false, "/b/c/d", true, true, true,
+ "http://host/b/c/d"},
+ {"http://host/a", true, false, "\\b\\c\\d", true, true, true,
+ "http://host/b/c/d"},
+ {"http://host/a", true, false, "/b/../c", true, true, true,
+ "http://host/c"},
+ {"http://host/a?b#c", true, false, "/b/../c", true, true, true,
+ "http://host/c"},
+ {"http://host/a", true, false, "\\b/../c?x#y", true, true, true,
+ "http://host/c?x#y"},
+ {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true,
+ "http://host/c?x#y"},
// Relative path input
- {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
- {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"},
- {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"},
- {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
- {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"},
- {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"},
+ {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
+ {"http://host/a", true, false, "bc/de", true, true, true,
+ "http://host/bc/de"},
+ {"http://host/a/", true, false, "bc/de?query#ref", true, true, true,
+ "http://host/a/bc/de?query#ref"},
+ {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
+ {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "././.", true, true, true,
+ "http://host/a/"},
+ {"http://host/a?query#ref", true, false, "../../../foo", true, true, true,
+ "http://host/foo"},
// Query input
- {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"},
- {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"},
- {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"},
+ {"http://host/a", true, false, "?foo=bar", true, true, true,
+ "http://host/a?foo=bar"},
+ {"http://host/a?x=y#z", true, false, "?", true, true, true,
+ "http://host/a?"},
+ {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true,
+ "http://host/a?foo=bar#com"},
// Ref input
- {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"},
- {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
- {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"},
+ {"http://host/a", true, false, "#ref", true, true, true,
+ "http://host/a#ref"},
+ {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
+ {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true,
+ "http://host/a?foo=bar#bye"},
// Non-hierarchical base: no relative handling. Relative input should
// error, and if a scheme is present, it should be treated as absolute.
- {"data:foobar", false, false, "baz.html", false, false, false, NULL},
- {"data:foobar", false, false, "data:baz", true, false, false, NULL},
- {"data:foobar", false, false, "data:/base", true, false, false, NULL},
+ {"data:foobar", false, false, "baz.html", false, false, false, nullptr},
+ {"data:foobar", false, false, "data:baz", true, false, false, nullptr},
+ {"data:foobar", false, false, "data:/base", true, false, false, nullptr},
// Non-hierarchical base: absolute input should succeed.
- {"data:foobar", false, false, "http://host/", true, false, false, NULL},
- {"data:foobar", false, false, "http:host", true, false, false, NULL},
+ {"data:foobar", false, false, "http://host/", true, false, false,
+ nullptr},
+ {"data:foobar", false, false, "http:host", true, false, false, nullptr},
// Non-hierarchical base: empty URL should give error.
- {"data:foobar", false, false, "", false, false, false, NULL},
+ {"data:foobar", false, false, "", false, false, false, nullptr},
// Invalid schemes should be treated as relative.
- {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
- {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
- {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"},
- {"data:asdf", false, false, ":foo", false, false, false, NULL},
- {"data:asdf", false, false, "bad(':foo')", false, false, false, NULL},
+ {"http://foo/bar", true, false, "./asd:fgh", true, true, true,
+ "http://foo/asd:fgh"},
+ {"http://foo/bar", true, false, ":foo", true, true, true,
+ "http://foo/:foo"},
+ {"http://foo/bar", true, false, " hello world", true, true, true,
+ "http://foo/hello%20world"},
+ {"data:asdf", false, false, ":foo", false, false, false, nullptr},
+ {"data:asdf", false, false, "bad(':foo')", false, false, false, nullptr},
// We should treat semicolons like any other character in URL resolving
- {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"},
- {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"},
- {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"},
+ {"http://host/a", true, false, ";foo", true, true, true,
+ "http://host/;foo"},
+ {"http://host/a;", true, false, ";foo", true, true, true,
+ "http://host/;foo"},
+ {"http://host/a", true, false, ";/../bar", true, true, true,
+ "http://host/bar"},
// Relative URLs can also be written as "//foo/bar" which is relative to
// the scheme. In this case, it would take the old scheme, so for http
// the example would resolve to "http://foo/bar".
- {"http://host/a", true, false, "//another", true, true, true, "http://another/"},
- {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"},
- {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "//", true, true, false, "http:"},
+ {"http://host/a", true, false, "//another", true, true, true,
+ "http://another/"},
+ {"http://host/a", true, false, "//another/path?query#ref", true, true,
+ true, "http://another/path?query#ref"},
+ {"http://host/a", true, false, "///another/path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "//Another\\path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "//", true, true, false, "http:"},
// IE will also allow one or the other to be a backslash to get the same
// behavior.
- {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"},
+ {"http://host/a", true, false, "\\/another/path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "/\\Another\\path", true, true, true,
+ "http://another/path"},
#ifdef WIN32
// Resolving against Windows file base URLs.
- {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL},
- {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
- {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"},
- {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"},
+ {"file:///C:/foo", true, true, "http://host/", true, false, false,
+ nullptr},
+ {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
+ {"file:///C:/foo", true, true, "../../../bar.html", true, true, true,
+ "file:///C:/bar.html"},
+ {"file:///C:/foo", true, true, "/../bar.html", true, true, true,
+ "file:///C:/bar.html"},
// But two backslashes on Windows should be UNC so should be treated
// as absolute.
- {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL},
+ {"http://host/a", true, false, "\\\\another\\path", true, false, false,
+ nullptr},
// IE doesn't support drive specs starting with two slashes. It fails
// immediately and doesn't even try to load. We fix it up to either
// an absolute path or UNC depending on what it looks like.
- {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"},
- {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"},
+ {"file:///C:/something", true, true, "//c:/foo", true, true, true,
+ "file:///C:/foo"},
+ {"file:///C:/something", true, true, "//localhost/c:/foo", true, true,
+ true, "file:///C:/foo"},
// Windows drive specs should be allowed and treated as absolute.
- {"file:///C:/foo", true, true, "c:", true, false, false, NULL},
- {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL},
- {"http://host/a", true, false, "c:\\foo", true, false, false, NULL},
+ {"file:///C:/foo", true, true, "c:", true, false, false, nullptr},
+ {"file:///C:/foo", true, true, "c:/foo", true, false, false, nullptr},
+ {"http://host/a", true, false, "c:\\foo", true, false, false, nullptr},
// Relative paths with drive letters should be allowed when the base is
// also a file.
- {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"},
+ {"file:///C:/foo", true, true, "/z:/bar", true, true, true,
+ "file:///Z:/bar"},
// Treat absolute paths as being off of the drive.
- {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"},
- {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"},
- {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"},
+ {"file:///C:/foo", true, true, "/bar", true, true, true,
+ "file:///C:/bar"},
+ {"file://localhost/C:/foo", true, true, "/bar", true, true, true,
+ "file://localhost/C:/bar"},
+ {"file:///C:/foo/com/", true, true, "/bar", true, true, true,
+ "file:///C:/bar"},
// On Windows, two slashes without a drive letter when the base is a file
// means that the path is UNC.
- {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"},
- {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"},
+ {"file:///C:/something", true, true, "//somehost/path", true, true, true,
+ "file://somehost/path"},
+ {"file:///C:/something", true, true, "/\\//somehost/path", true, true,
+ true, "file://somehost/path"},
#else
// On Unix we fall back to relative behavior since there's nothing else
// reasonable to do.
- {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"},
+ {"http://host/a", true, false, "\\\\Another\\path", true, true, true,
+ "http://another/path"},
#endif
// Even on Windows, we don't allow relative drive specs when the base
// is not file.
- {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
- {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+ {"http://host/a", true, false, "/c:\\foo", true, true, true,
+ "http://host/c:/foo"},
+ {"http://host/a", true, false, "//c:\\foo", true, true, true,
+ "http://c/foo"},
// Cross-platform relative file: resolution behavior.
- {"file://host/a", true, true, "/", true, true, true, "file://host/"},
- {"file://host/a", true, true, "//", true, true, true, "file:///"},
- {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
- {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
+ {"file://host/a", true, true, "/", true, true, true, "file://host/"},
+ {"file://host/a", true, true, "//", true, true, true, "file:///"},
+ {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
+ {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
// Ensure that ports aren't allowed for hosts relative to a file url.
// Although the result string shows a host:port portion, the call to
// resolve the relative URL returns false, indicating parse failure,
// which is what is required.
- {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
+ {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false,
+ "file://host:80/bar.txt"},
// Filesystem URL tests; filesystem URLs are only valid and relative if
// they have no scheme, e.g. "./index.html". There's no valid equivalent
// to http:index.html.
- {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL},
- {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"},
- {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"},
- {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL},
+ {"filesystem:http://host/t/path", true, false,
+ "filesystem:http://host/t/path2", true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false,
+ "filesystem:https://host/t/path2", true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false, "http://host/t/path2",
+ true, false, false, nullptr},
+ {"http://host/t/path", true, false, "filesystem:http://host/t/path2",
+ true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false, "./path2", true, true,
+ true, "filesystem:http://host/t/path2"},
+ {"filesystem:http://host/t/path/", true, false, "path2", true, true, true,
+ "filesystem:http://host/t/path/path2"},
+ {"filesystem:http://host/t/path", true, false, "filesystem:http:path2",
+ true, false, false, nullptr},
// Absolute URLs are still not relative to a non-standard base URL.
- {"about:blank", false, false, "http://X/A", true, false, true, ""},
- {"about:blank", false, false, "content://content.Provider/", true, false, true, ""},
+ {"about:blank", false, false, "http://X/A", true, false, true, ""},
+ {"about:blank", false, false, "content://content.Provider/", true, false,
+ true, ""},
};
- for (size_t i = 0; i < std::size(rel_cases); i++) {
- const RelativeCase& cur_case = rel_cases[i];
-
+ for (const auto& cur_case : rel_cases) {
Parsed parsed;
int base_len = static_cast<int>(strlen(cur_case.base));
if (cur_case.is_base_file)
@@ -2565,7 +2677,7 @@ TEST(URLCanonTest, ResolveRelativeURL) {
bool succeed_resolve = ResolveRelativeURL(
cur_case.base, parsed, cur_case.is_base_file, cur_case.test,
- relative_component, NULL, &output, &resolved_parsed);
+ relative_component, nullptr, &output, &resolved_parsed);
output.Complete();
EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve);
@@ -2615,7 +2727,7 @@ TEST(URLCanonTest, ReplacementOverflow) {
Parsed repl_parsed;
std::string repl_str;
StdStringCanonOutput repl_output(&repl_str);
- ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed);
+ ReplaceFileURL(src, parsed, repl, nullptr, &repl_output, &repl_parsed);
repl_output.Complete();
// Generate the expected string and check.
@@ -2643,7 +2755,7 @@ TEST(URLCanonTest, DefaultPortForScheme) {
{"WSS", PORT_UNSPECIFIED},
};
- for (auto& test_case : cases) {
+ for (const auto& test_case : cases) {
SCOPED_TRACE(test_case.scheme);
EXPECT_EQ(test_case.expected_port,
DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
@@ -2652,7 +2764,7 @@ TEST(URLCanonTest, DefaultPortForScheme) {
TEST(URLCanonTest, FindWindowsDriveLetter) {
struct TestCase {
- base::StringPiece spec;
+ std::string_view spec;
int begin;
int end; // -1 for end of spec
int expected_drive_letter_pos;
@@ -2696,33 +2808,33 @@ TEST(URLCanonTest, IDNToASCII) {
// Basic ASCII test.
std::u16string str = u"hello";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"hello", std::u16string(output.data()));
output.set_length(0);
// Mixed ASCII/non-ASCII.
str = u"hellö";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// All non-ASCII.
str = u"你好";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
output.set_length(0);
// Characters that need mapping (the resulting Punycode is the encoding for
// "1⁄4").
str = u"¼";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and all ASCII. Should not
// modify the string.
str = u"xn--hell-8qa";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
@@ -2730,7 +2842,7 @@ TEST(URLCanonTest, IDNToASCII) {
// Should fail, due to a special case: if the label starts with "xn--", it
// should be parsed as Punycode, which must be all ASCII.
str = u"xn--hellö";
- EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_FALSE(IDNToASCII(str, &output));
output.set_length(0);
// String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
@@ -2738,31 +2850,8 @@ TEST(URLCanonTest, IDNToASCII) {
// which would be a valid ASCII character, U+0044, if the high byte were
// ignored.
str = u"xn--1⁄4";
- EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_FALSE(IDNToASCII(str, &output));
output.set_length(0);
}
-TEST(URLCanonTest, UnescapePathCharHistogram) {
- struct TestCase {
- base::StringPiece path;
- base::HistogramBase::Count cnt;
- } cases[] = {
- {"/a", 0},
- {"/%61", 1},
- {"/%61%61", 1},
- };
-
- for (const auto& c : cases) {
- base::HistogramTester histogram_tester;
- Component in_comp(0, c.path.size());
- Component out_comp;
- std::string out_str;
- StdStringCanonOutput output(&out_str);
- bool success = CanonicalizePath(c.path.data(), in_comp, &output, &out_comp);
- ASSERT_TRUE(success);
- histogram_tester.ExpectBucketCount("URL.Path.UnescapeEscapedChar", 1,
- c.cnt);
- }
-}
-
} // namespace url
diff --git a/url/url_features.cc b/url/url_features.cc
index 858e82790..5d6bc165d 100644
--- a/url/url_features.cc
+++ b/url/url_features.cc
@@ -16,15 +16,16 @@ BASE_FEATURE(kRecordIDNA2008Metrics,
"RecordIDNA2008Metrics",
base::FEATURE_ENABLED_BY_DEFAULT);
-BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing,
- "StrictIPv4EmbeddedIPv6AddressParsing",
- base::FEATURE_ENABLED_BY_DEFAULT);
-
// Kill switch for crbug.com/1220361.
BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
"ResolveBareFragmentWithColonOnNonHierarchical",
base::FEATURE_ENABLED_BY_DEFAULT);
+// Kill switch for https://crbug.com/1416013.
+BASE_FEATURE(kStandardCompliantHostCharacters,
+ "StandardCompliantHostCharacters",
+ base::FEATURE_ENABLED_BY_DEFAULT);
+
bool IsUsingIDNA2008NonTransitional() {
// If the FeatureList isn't available yet, fall back to the feature's default
// state. This may happen during early startup, see crbug.com/1441956.
@@ -36,6 +37,17 @@ bool IsUsingIDNA2008NonTransitional() {
return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
}
+bool IsUsingStandardCompliantHostCharacters() {
+ // If the FeatureList isn't available yet, fall back to the feature's default
+ // state. This may happen during early startup, see crbug.com/1441956.
+ if (!base::FeatureList::GetInstance()) {
+ return kStandardCompliantHostCharacters.default_state ==
+ base::FEATURE_ENABLED_BY_DEFAULT;
+ }
+
+ return base::FeatureList::IsEnabled(kStandardCompliantHostCharacters);
+}
+
bool IsRecordingIDNA2008Metrics() {
return base::FeatureList::IsEnabled(kRecordIDNA2008Metrics);
}
diff --git a/url/url_features.h b/url/url_features.h
index e95752141..68bcc5ae3 100644
--- a/url/url_features.h
+++ b/url/url_features.h
@@ -18,16 +18,21 @@ COMPONENT_EXPORT(URL) bool IsUsingIDNA2008NonTransitional();
// Returns true if Chrome is recording IDNA 2008 related metrics.
COMPONENT_EXPORT(URL) bool IsRecordingIDNA2008Metrics();
-// Returns true if Chrome is enforcing the 4 part check for IPv4 embedded IPv6
-// addresses.
-COMPONENT_EXPORT(URL)
-BASE_DECLARE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing);
+// Returns true if IsUsingStandardCompliantHostCharacters feature is enabled.
+// See url::kStandardCompliantHostCharacters for details.
+COMPONENT_EXPORT(URL) bool IsUsingStandardCompliantHostCharacters();
// When enabled, allows resolving of a bare fragment containing a colon against
// a non-hierarchical URL. (For example '#foo:bar' against 'about:blank'.)
COMPONENT_EXPORT(URL)
BASE_DECLARE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical);
+// When enabled, Chrome uses URL Standard compliant mode to
+// handle punctuation characters in URL host part.
+// https://crbug.com/1416013 for details.
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kStandardCompliantHostCharacters);
+
} // namespace url
#endif // URL_URL_FEATURES_H_
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index 0a552a880..912cb5873 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -11,6 +11,7 @@
#include <ostream>
#include "base/check_op.h"
+#include "base/numerics/safe_conversions.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "url/url_canon_icu.h"
@@ -90,7 +91,7 @@ UIDNA* GetUIDNA() {
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {
DCHECK(output->length() == 0); // Output buffer is assumed empty.
UIDNA* uidna = GetUIDNA();
@@ -98,8 +99,9 @@ bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
- output->capacity(), &info, &err);
+ int output_length = uidna_nameToASCII(
+ uidna, src.data(), base::checked_cast<int32_t>(src.size()),
+ output->data(), output->capacity(), &info, &err);
// Ignore various errors for web compatibility. The options are specified
// by the WHATWG URL Standard. See
diff --git a/url/url_idna_icu_alternatives_android.cc b/url/url_idna_icu_alternatives_android.cc
index 9faf5710f..878e88558 100644
--- a/url/url_idna_icu_alternatives_android.cc
+++ b/url/url_idna_icu_alternatives_android.cc
@@ -5,10 +5,10 @@
#include <string.h>
#include <string>
+#include <string_view>
#include "base/android/jni_android.h"
#include "base/android/jni_string.h"
-#include "base/strings/string_piece.h"
#include "url/url_canon_internal.h"
#include "url/url_jni_headers/IDNStringUtil_jni.h"
@@ -18,13 +18,12 @@ namespace url {
// This uses the JDK's conversion function, which uses IDNA 2003, unlike the
// ICU implementation.
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {
DCHECK_EQ(0u, output->length()); // Output buffer is assumed empty.
JNIEnv* env = base::android::AttachCurrentThread();
base::android::ScopedJavaLocalRef<jstring> java_src =
- base::android::ConvertUTF16ToJavaString(
- env, base::StringPiece16(src, src_len));
+ base::android::ConvertUTF16ToJavaString(env, src);
ScopedJavaLocalRef<jstring> java_result =
android::Java_IDNStringUtil_idnToASCII(env, java_src);
// NULL indicates failure.
diff --git a/url/url_idna_icu_alternatives_ios.mm b/url/url_idna_icu_alternatives_ios.mm
index d604b3516..c36a61807 100644
--- a/url/url_idna_icu_alternatives_ios.mm
+++ b/url/url_idna_icu_alternatives_ios.mm
@@ -6,8 +6,8 @@
#include <ostream>
#include <string>
+#include <string_view>
-#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "url/url_canon_internal.h"
@@ -16,9 +16,9 @@ namespace url {
// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
// to convert non-ASCII URL prior to passing to API.
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
- if (base::IsStringASCII(base::StringPiece16(src, src_len))) {
- output->Append(src, src_len);
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {
+ if (base::IsStringASCII(src)) {
+ output->Append(src);
return true;
}
DCHECK(false) << "IDN URL support is not available.";
diff --git a/url/url_parse_perftest.cc b/url/url_parse_perftest.cc
index 7fe1d39b1..f06e019e8 100644
--- a/url/url_parse_perftest.cc
+++ b/url/url_parse_perftest.cc
@@ -2,7 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "base/test/perf_time_logger.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
@@ -13,7 +14,7 @@
namespace {
TEST(URLParse, FullURL) {
- constexpr base::StringPiece kUrl =
+ constexpr std::string_view kUrl =
"http://me:pass@host/foo/bar.html;param?query=yes#ref";
url::Parsed parsed;
@@ -24,16 +25,16 @@ TEST(URLParse, FullURL) {
timer.Done();
}
-constexpr base::StringPiece kTypicalUrl1 =
+constexpr std::string_view kTypicalUrl1 =
"http://www.google.com/"
"search?q=url+parsing&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:en-US:"
"official&client=firefox-a";
-constexpr base::StringPiece kTypicalUrl2 =
+constexpr std::string_view kTypicalUrl2 =
"http://www.amazon.com/Stephen-King-Thrillers-Horror-People/dp/0766012336/"
"ref=sr_1_2/133-4144931-4505264?ie=UTF8&s=books&qid=2144880915&sr=8-2";
-constexpr base::StringPiece kTypicalUrl3 =
+constexpr std::string_view kTypicalUrl3 =
"http://store.apple.com/1-800-MY-APPLE/WebObjects/AppleStore.woa/wa/"
"RSLID?nnmm=browse&mco=578E9744&node=home/desktop/mac_pro";
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index 88b6f05ef..d67becf33 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc
@@ -134,11 +134,11 @@ TEST(URLParser, Length) {
"http://user@",
"http:",
};
- for (size_t i = 0; i < std::size(length_cases); i++) {
- int true_length = static_cast<int>(strlen(length_cases[i]));
+ for (const char* length_case : length_cases) {
+ int true_length = static_cast<int>(strlen(length_case));
Parsed parsed;
- ParseStandardURL(length_cases[i], true_length, &parsed);
+ ParseStandardURL(length_case, true_length, &parsed);
EXPECT_EQ(true_length, parsed.Length());
}
@@ -193,154 +193,159 @@ TEST(URLParser, CountCharactersBefore) {
{"file:///c:/foo", Parsed::HOST, true, 7},
{"file:///c:/foo", Parsed::PATH, true, 7},
};
- for (size_t i = 0; i < std::size(count_cases); i++) {
- int length = static_cast<int>(strlen(count_cases[i].url));
+ for (const auto& count_case : count_cases) {
+ int length = static_cast<int>(strlen(count_case.url));
// Simple test to distinguish file and standard URLs.
Parsed parsed;
- if (length > 0 && count_cases[i].url[0] == 'f')
- ParseFileURL(count_cases[i].url, length, &parsed);
- else
- ParseStandardURL(count_cases[i].url, length, &parsed);
+ if (length > 0 && count_case.url[0] == 'f') {
+ ParseFileURL(count_case.url, length, &parsed);
+ } else {
+ ParseStandardURL(count_case.url, length, &parsed);
+ }
int chars_before = parsed.CountCharactersBefore(
- count_cases[i].component, count_cases[i].include_delimiter);
- EXPECT_EQ(count_cases[i].expected_count, chars_before);
+ count_case.component, count_case.include_delimiter);
+ EXPECT_EQ(count_case.expected_count, chars_before);
}
}
// Standard --------------------------------------------------------------------
-// Input Scheme Usrname Passwd Host Port Path Query Ref
-// ------------------------------------ ------- ------- ---------- ------------ --- ---------- ------------ -----
+// clang-format off
+// Input Scheme Usrname Passwd Host Port Path Query Ref
+// ------------------------------------ ------- -------- ---------- ------------ --- ---------- ------------ -----
static URLParseCase cases[] = {
// Regular URL with all the parts
-{"http://user:pass@foo:21/bar;par?b#c", "http", "user", "pass", "foo", 21, "/bar;par","b", "c"},
+{"http://user:pass@foo:21/bar;par?b#c", "http", "user", "pass", "foo", 21, "/bar;par","b", "c"},
// Known schemes should lean towards authority identification
-{"http:foo.com", "http", NULL, NULL, "foo.com", -1, NULL, NULL, NULL},
+{"http:foo.com", "http", nullptr, nullptr, "foo.com", -1, nullptr, nullptr, nullptr},
// Spaces!
-{"\t :foo.com \n", "", NULL, NULL, "foo.com", -1, NULL, NULL, NULL},
-{" foo.com ", NULL, NULL, NULL, "foo.com", -1, NULL, NULL, NULL},
-{"a:\t foo.com", "a", NULL, NULL, "\t foo.com", -1, NULL, NULL, NULL},
-{"http://f:21/ b ? d # e ", "http", NULL, NULL, "f", 21, "/ b ", " d ", " e"},
+{"\t :foo.com \n", "", nullptr, nullptr, "foo.com", -1, nullptr, nullptr, nullptr},
+{" foo.com ", nullptr,nullptr, nullptr, "foo.com", -1, nullptr, nullptr, nullptr},
+{"a:\t foo.com", "a", nullptr, nullptr, "\t foo.com", -1, nullptr, nullptr, nullptr},
+{"http://f:21/ b ? d # e ", "http", nullptr, nullptr, "f", 21, "/ b ", " d ", " e"},
// Invalid port numbers should be identified and turned into -2, empty port
// numbers should be -1. Spaces aren't allowed in port numbers
-{"http://f:/c", "http", NULL, NULL, "f", -1, "/c", NULL, NULL},
-{"http://f:0/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL},
-{"http://f:00000000000000/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL},
-{"http://f:00000000000000000000080/c", "http", NULL, NULL, "f", 80, "/c", NULL, NULL},
-{"http://f:b/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL},
-{"http://f: /c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL},
-{"http://f:\n/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL},
-{"http://f:fifty-two/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL},
-{"http://f:999999/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL},
-{"http://f: 21 / b ? d # e ", "http", NULL, NULL, "f", -2, "/ b ", " d ", " e"},
+{"http://f:/c", "http", nullptr, nullptr, "f", -1, "/c", nullptr, nullptr},
+{"http://f:0/c", "http", nullptr, nullptr, "f", 0, "/c", nullptr, nullptr},
+{"http://f:00000000000000/c", "http", nullptr, nullptr, "f", 0, "/c", nullptr, nullptr},
+{"http://f:00000000000000000000080/c", "http", nullptr, nullptr, "f", 80, "/c", nullptr, nullptr},
+{"http://f:b/c", "http", nullptr, nullptr, "f", -2, "/c", nullptr, nullptr},
+{"http://f: /c", "http", nullptr, nullptr, "f", -2, "/c", nullptr, nullptr},
+{"http://f:\n/c", "http", nullptr, nullptr, "f", -2, "/c", nullptr, nullptr},
+{"http://f:fifty-two/c", "http", nullptr, nullptr, "f", -2, "/c", nullptr, nullptr},
+{"http://f:999999/c", "http", nullptr, nullptr, "f", -2, "/c", nullptr, nullptr},
+{"http://f: 21 / b ? d # e ", "http", nullptr, nullptr, "f", -2, "/ b ", " d ", " e"},
// Creative URLs missing key elements
-{"", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{" \t", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{":foo.com/", "", NULL, NULL, "foo.com", -1, "/", NULL, NULL},
-{":foo.com\\", "", NULL, NULL, "foo.com", -1, "\\", NULL, NULL},
-{":", "", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{":a", "", NULL, NULL, "a", -1, NULL, NULL, NULL},
-{":/", "", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{":\\", "", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{":#", "", NULL, NULL, NULL, -1, NULL, NULL, ""},
-{"#", NULL, NULL, NULL, NULL, -1, NULL, NULL, ""},
-{"#/", NULL, NULL, NULL, NULL, -1, NULL, NULL, "/"},
-{"#\\", NULL, NULL, NULL, NULL, -1, NULL, NULL, "\\"},
-{"#;?", NULL, NULL, NULL, NULL, -1, NULL, NULL, ";?"},
-{"?", NULL, NULL, NULL, NULL, -1, NULL, "", NULL},
-{"/", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{":23", "", NULL, NULL, "23", -1, NULL, NULL, NULL},
-{"/:23", "/", NULL, NULL, "23", -1, NULL, NULL, NULL},
-{"//", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{"::", "", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{"::23", "", NULL, NULL, NULL, 23, NULL, NULL, NULL},
-{"foo://", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL},
+{"", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{" \t", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{":foo.com/", "", nullptr, nullptr, "foo.com", -1, "/", nullptr, nullptr},
+{":foo.com\\", "", nullptr, nullptr, "foo.com", -1, "\\", nullptr, nullptr},
+{":", "", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{":a", "", nullptr, nullptr, "a", -1, nullptr, nullptr, nullptr},
+{":/", "", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{":\\", "", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{":#", "", nullptr, nullptr, nullptr, -1, nullptr, nullptr, ""},
+{"#", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, ""},
+{"#/", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, "/"},
+{"#\\", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, "\\"},
+{"#;?", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, ";?"},
+{"?", nullptr,nullptr, nullptr, nullptr, -1, nullptr, "", nullptr},
+{"/", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{":23", "", nullptr, nullptr, "23", -1, nullptr, nullptr, nullptr},
+{"/:23", "/", nullptr, nullptr, "23", -1, nullptr, nullptr, nullptr},
+{"//", nullptr,nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{"::", "", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{"::23", "", nullptr, nullptr, nullptr, 23, nullptr, nullptr, nullptr},
+{"foo://", "foo", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
// Username/passwords and things that look like them
-{"http://a:b@c:29/d", "http", "a", "b", "c", 29, "/d", NULL, NULL},
-{"http::@c:29", "http", "", "", "c", 29, NULL, NULL, NULL},
+{"http://a:b@c:29/d", "http", "a", "b", "c", 29, "/d", nullptr, nullptr},
+{"http::@c:29", "http", "", "", "c", 29, nullptr, nullptr, nullptr},
// ... "]" in the password field isn't allowed, but we tolerate it here...
-{"http://&a:foo(b]c@d:2/", "http", "&a", "foo(b]c", "d", 2, "/", NULL, NULL},
-{"http://::@c@d:2", "http", "", ":@c", "d", 2, NULL, NULL, NULL},
-{"http://foo.com:b@d/", "http", "foo.com", "b", "d", -1, "/", NULL, NULL},
+{"http://&a:foo(b]c@d:2/", "http", "&a", "foo(b]c", "d", 2, "/", nullptr, nullptr},
+{"http://::@c@d:2", "http", "", ":@c", "d", 2, nullptr, nullptr, nullptr},
+{"http://foo.com:b@d/", "http", "foo.com","b", "d", -1, "/", nullptr, nullptr},
-{"http://foo.com/\\@", "http", NULL, NULL, "foo.com", -1, "/\\@", NULL, NULL},
-{"http:\\\\foo.com\\", "http", NULL, NULL, "foo.com", -1, "\\", NULL, NULL},
-{"http:\\\\a\\b:c\\d@foo.com\\", "http", NULL, NULL, "a", -1, "\\b:c\\d@foo.com\\", NULL, NULL},
+{"http://foo.com/\\@", "http", nullptr, nullptr, "foo.com", -1, "/\\@", nullptr, nullptr},
+{"http:\\\\foo.com\\", "http", nullptr, nullptr, "foo.com", -1, "\\", nullptr, nullptr},
+{"http:\\\\a\\b:c\\d@foo.com\\", "http", nullptr, nullptr, "a", -1, "\\b:c\\d@foo.com\\", nullptr,nullptr},
// Tolerate different numbers of slashes.
-{"foo:/", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{"foo:/bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL},
-{"foo://///////", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{"foo://///////bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL},
-{"foo:////://///", "foo", NULL, NULL, NULL, -1, "/////", NULL, NULL},
+{"foo:/", "foo", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{"foo:/bar.com/", "foo", nullptr, nullptr, "bar.com", -1, "/", nullptr, nullptr},
+{"foo://///////", "foo", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{"foo://///////bar.com/", "foo", nullptr, nullptr, "bar.com", -1, "/", nullptr, nullptr},
+{"foo:////://///", "foo", nullptr, nullptr, nullptr, -1, "/////", nullptr, nullptr},
// Raw file paths on Windows aren't handled by the parser.
-{"c:/foo", "c", NULL, NULL, "foo", -1, NULL, NULL, NULL},
-{"//foo/bar", NULL, NULL, NULL, "foo", -1, "/bar", NULL, NULL},
+{"c:/foo", "c", nullptr, nullptr, "foo", -1, nullptr, nullptr, nullptr},
+{"//foo/bar", nullptr,nullptr, nullptr, "foo", -1, "/bar", nullptr, nullptr},
// Use the first question mark for the query and the ref.
-{"http://foo/path;a??e#f#g", "http", NULL, NULL, "foo", -1, "/path;a", "?e", "f#g"},
-{"http://foo/abcd?efgh?ijkl", "http", NULL, NULL, "foo", -1, "/abcd", "efgh?ijkl", NULL},
-{"http://foo/abcd#foo?bar", "http", NULL, NULL, "foo", -1, "/abcd", NULL, "foo?bar"},
+{"http://foo/path;a??e#f#g", "http", nullptr, nullptr, "foo", -1, "/path;a", "?e", "f#g"},
+{"http://foo/abcd?efgh?ijkl", "http", nullptr, nullptr, "foo", -1, "/abcd", "efgh?ijkl", nullptr},
+{"http://foo/abcd#foo?bar", "http", nullptr, nullptr, "foo", -1, "/abcd", nullptr, "foo?bar"},
// IPv6, check also interesting uses of colons.
-{"[61:24:74]:98", "[61", NULL, NULL, "24:74]", 98, NULL, NULL, NULL},
-{"http://[61:27]:98", "http", NULL, NULL, "[61:27]", 98, NULL, NULL, NULL},
-{"http:[61:27]/:foo", "http", NULL, NULL, "[61:27]", -1, "/:foo", NULL, NULL},
-{"http://[1::2]:3:4", "http", NULL, NULL, "[1::2]:3", 4, NULL, NULL, NULL},
+{"[61:24:74]:98", "[61", nullptr, nullptr, "24:74]", 98, nullptr, nullptr, nullptr},
+{"http://[61:27]:98", "http", nullptr, nullptr, "[61:27]", 98, nullptr, nullptr, nullptr},
+{"http:[61:27]/:foo", "http", nullptr, nullptr, "[61:27]", -1, "/:foo", nullptr, nullptr},
+{"http://[1::2]:3:4", "http", nullptr, nullptr, "[1::2]:3", 4, nullptr, nullptr, nullptr},
// Partially-complete IPv6 literals, and related cases.
-{"http://2001::1", "http", NULL, NULL, "2001:", 1, NULL, NULL, NULL},
-{"http://[2001::1", "http", NULL, NULL, "[2001::1", -1, NULL, NULL, NULL},
-{"http://2001::1]", "http", NULL, NULL, "2001::1]", -1, NULL, NULL, NULL},
-{"http://2001::1]:80", "http", NULL, NULL, "2001::1]", 80, NULL, NULL, NULL},
-{"http://[2001::1]", "http", NULL, NULL, "[2001::1]", -1, NULL, NULL, NULL},
-{"http://[2001::1]:80", "http", NULL, NULL, "[2001::1]", 80, NULL, NULL, NULL},
-{"http://[[::]]", "http", NULL, NULL, "[[::]]", -1, NULL, NULL, NULL},
+{"http://2001::1", "http", nullptr, nullptr, "2001:", 1, nullptr, nullptr, nullptr},
+{"http://[2001::1", "http", nullptr, nullptr, "[2001::1", -1, nullptr, nullptr, nullptr},
+{"http://2001::1]", "http", nullptr, nullptr, "2001::1]", -1, nullptr, nullptr, nullptr},
+{"http://2001::1]:80", "http", nullptr, nullptr, "2001::1]", 80, nullptr, nullptr, nullptr},
+{"http://[2001::1]", "http", nullptr, nullptr, "[2001::1]", -1, nullptr, nullptr, nullptr},
+{"http://[2001::1]:80", "http", nullptr, nullptr, "[2001::1]", 80, nullptr, nullptr, nullptr},
+{"http://[[::]]", "http", nullptr, nullptr, "[[::]]", -1, nullptr, nullptr, nullptr},
};
+// clang-format on
TEST(URLParser, Standard) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < std::size(cases); i++) {
- const char* url = cases[i].input;
+ for (const auto& i : cases) {
+ const char* url = i.input;
ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
int port = ParsePort(url, parsed.port);
- EXPECT_TRUE(ComponentMatches(url, cases[i].scheme, parsed.scheme));
- EXPECT_TRUE(ComponentMatches(url, cases[i].username, parsed.username));
- EXPECT_TRUE(ComponentMatches(url, cases[i].password, parsed.password));
- EXPECT_TRUE(ComponentMatches(url, cases[i].host, parsed.host));
- EXPECT_EQ(cases[i].port, port);
- EXPECT_TRUE(ComponentMatches(url, cases[i].path, parsed.path));
- EXPECT_TRUE(ComponentMatches(url, cases[i].query, parsed.query));
- EXPECT_TRUE(ComponentMatches(url, cases[i].ref, parsed.ref));
+ EXPECT_TRUE(ComponentMatches(url, i.scheme, parsed.scheme));
+ EXPECT_TRUE(ComponentMatches(url, i.username, parsed.username));
+ EXPECT_TRUE(ComponentMatches(url, i.password, parsed.password));
+ EXPECT_TRUE(ComponentMatches(url, i.host, parsed.host));
+ EXPECT_EQ(i.port, port);
+ EXPECT_TRUE(ComponentMatches(url, i.path, parsed.path));
+ EXPECT_TRUE(ComponentMatches(url, i.query, parsed.query));
+ EXPECT_TRUE(ComponentMatches(url, i.ref, parsed.ref));
}
}
// PathURL --------------------------------------------------------------------
// Various incarnations of path URLs.
+// clang-format off
static PathURLParseCase path_cases[] = {
-{"", NULL, NULL},
-{":", "", NULL},
+{"", nullptr, nullptr},
+{":", "", nullptr},
{":/", "", "/"},
-{"/", NULL, "/"},
-{" This is \\interesting// \t", NULL, "This is \\interesting// \t"},
-{"about:", "about", NULL},
+{"/", nullptr, "/"},
+{" This is \\interesting// \t", nullptr, "This is \\interesting// \t"},
+{"about:", "about", nullptr},
{"about:blank", "about", "blank"},
{" about: blank ", "about", " blank "},
{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\"); "},
};
+// clang-format on
TEST(URLParser, PathURL) {
// Declared outside for loop to try to catch cases in init() where we forget
@@ -364,82 +369,84 @@ TEST(URLParser, PathURL) {
}
// Various incarnations of file URLs.
+// clang-format off
static URLParseCase file_cases[] = {
#ifdef WIN32
-{"file:server", "file", NULL, NULL, "server", -1, NULL, NULL, NULL},
-{" file: server \t", "file", NULL, NULL, " server",-1, NULL, NULL, NULL},
-{"FiLe:c|", "FiLe", NULL, NULL, NULL, -1, "c|", NULL, NULL},
-{"FILE:/\\\\/server/file", "FILE", NULL, NULL, "server", -1, "/file", NULL, NULL},
-{"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL},
-{"file://localhost/c:/", "file", NULL, NULL, "localhost", -1, "/c:/", NULL, NULL},
-{"file://127.0.0.1/c|\\", "file", NULL, NULL, "127.0.0.1", -1, "/c|\\", NULL, NULL},
-{"file:/", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
-{"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
+{"file:server", "file", nullptr, nullptr, "server", -1, nullptr, nullptr, nullptr},
+{" file: server \t", "file", nullptr, nullptr, " server",-1, nullptr, nullptr, nullptr},
+{"FiLe:c|", "FiLe", nullptr, nullptr, nullptr, -1, "c|", nullptr, nullptr},
+{"FILE:/\\\\/server/file", "FILE", nullptr, nullptr, "server", -1, "/file", nullptr, nullptr},
+{"file://server/", "file", nullptr, nullptr, "server", -1, "/", nullptr, nullptr},
+{"file://localhost/c:/", "file", nullptr, nullptr, "localhost", -1, "/c:/", nullptr, nullptr},
+{"file://127.0.0.1/c|\\", "file", nullptr, nullptr, "127.0.0.1", -1, "/c|\\", nullptr, nullptr},
+{"file:/", "file", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+{"file:", "file", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
// If there is a Windows drive letter, treat any number of slashes as the
// path part.
-{"file:c:\\fo\\b", "file", NULL, NULL, NULL, -1, "c:\\fo\\b", NULL, NULL},
-{"file:/c:\\foo/bar", "file", NULL, NULL, NULL, -1, "/c:\\foo/bar",NULL, NULL},
-{"file://c:/f\\b", "file", NULL, NULL, NULL, -1, "/c:/f\\b", NULL, NULL},
-{"file:///C:/foo", "file", NULL, NULL, NULL, -1, "/C:/foo", NULL, NULL},
-{"file://///\\/\\/c:\\f\\b", "file", NULL, NULL, NULL, -1, "/c:\\f\\b", NULL, NULL},
+{"file:c:\\fo\\b", "file", nullptr, nullptr, nullptr, -1, "c:\\fo\\b", nullptr, nullptr},
+{"file:/c:\\foo/bar", "file", nullptr, nullptr, nullptr, -1, "/c:\\foo/bar",nullptr, nullptr},
+{"file://c:/f\\b", "file", nullptr, nullptr, nullptr, -1, "/c:/f\\b", nullptr, nullptr},
+{"file:///C:/foo", "file", nullptr, nullptr, nullptr, -1, "/C:/foo", nullptr, nullptr},
+{"file://///\\/\\/c:\\f\\b", "file", nullptr, nullptr, nullptr, -1, "/c:\\f\\b", nullptr, nullptr},
// If there is not a drive letter, we should treat is as UNC EXCEPT for
// three slashes, which we treat as a Unix style path.
-{"file:server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL},
-{"file:/server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL},
-{"file://server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL},
-{"file:///server/file", "file", NULL, NULL, NULL, -1, "/server/file",NULL, NULL},
-{"file://\\server/file", "file", NULL, NULL, NULL, -1, "\\server/file",NULL, NULL},
-{"file:////server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL},
+{"file:server/file", "file", nullptr, nullptr, "server", -1, "/file", nullptr, nullptr},
+{"file:/server/file", "file", nullptr, nullptr, "server", -1, "/file", nullptr, nullptr},
+{"file://server/file", "file", nullptr, nullptr, "server", -1, "/file", nullptr, nullptr},
+{"file:///server/file", "file", nullptr, nullptr, nullptr, -1, "/server/file",nullptr, nullptr},
+{"file://\\server/file", "file", nullptr, nullptr, nullptr, -1, "\\server/file",nullptr, nullptr},
+{"file:////server/file", "file", nullptr, nullptr, "server", -1, "/file", nullptr, nullptr},
// Queries and refs are valid for file URLs as well.
-{"file:///C:/foo.html?#", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "", ""},
-{"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"},
+{"file:///C:/foo.html?#", "file", nullptr, nullptr, nullptr, -1, "/C:/foo.html", "", ""},
+{"file:///C:/foo.html?query=yes#ref", "file", nullptr, nullptr, nullptr, -1, "/C:/foo.html", "query=yes", "ref"},
#else // WIN32
// No slashes.
- {"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
- {"file:path", "file", NULL, NULL, NULL, -1, "path", NULL, NULL},
- {"file:path/", "file", NULL, NULL, NULL, -1, "path/", NULL, NULL},
- {"file:path/f.txt", "file", NULL, NULL, NULL, -1, "path/f.txt", NULL, NULL},
+ {"file:", "file", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+ {"file:path", "file", nullptr, nullptr, nullptr, -1, "path", nullptr, nullptr},
+ {"file:path/", "file", nullptr, nullptr, nullptr, -1, "path/", nullptr, nullptr},
+ {"file:path/f.txt", "file", nullptr, nullptr, nullptr, -1, "path/f.txt", nullptr, nullptr},
// One slash.
- {"file:/", "file", NULL, NULL, NULL, -1, "/", NULL, NULL},
- {"file:/path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL},
- {"file:/path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL},
- {"file:/path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
+ {"file:/", "file", nullptr, nullptr, nullptr, -1, "/", nullptr, nullptr},
+ {"file:/path", "file", nullptr, nullptr, nullptr, -1, "/path", nullptr, nullptr},
+ {"file:/path/", "file", nullptr, nullptr, nullptr, -1, "/path/", nullptr, nullptr},
+ {"file:/path/f.txt", "file", nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
// Two slashes.
- {"file://", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
- {"file://server", "file", NULL, NULL, "server", -1, NULL, NULL, NULL},
- {"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL},
- {"file://server/f.txt", "file", NULL, NULL, "server", -1, "/f.txt", NULL, NULL},
+ {"file://", "file", nullptr, nullptr, nullptr, -1, nullptr, nullptr, nullptr},
+ {"file://server", "file", nullptr, nullptr, "server", -1, nullptr, nullptr, nullptr},
+ {"file://server/", "file", nullptr, nullptr, "server", -1, "/", nullptr, nullptr},
+ {"file://server/f.txt", "file", nullptr, nullptr, "server", -1, "/f.txt", nullptr, nullptr},
// Three slashes.
- {"file:///", "file", NULL, NULL, NULL, -1, "/", NULL, NULL},
- {"file:///path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL},
- {"file:///path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL},
- {"file:///path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
+ {"file:///", "file", nullptr, nullptr, nullptr, -1, "/", nullptr, nullptr},
+ {"file:///path", "file", nullptr, nullptr, nullptr, -1, "/path", nullptr, nullptr},
+ {"file:///path/", "file", nullptr, nullptr, nullptr, -1, "/path/", nullptr, nullptr},
+ {"file:///path/f.txt", "file", nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
// More than three slashes.
- {"file:////", "file", NULL, NULL, NULL, -1, "/", NULL, NULL},
- {"file:////path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL},
- {"file:////path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL},
- {"file:////path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
+ {"file:////", "file", nullptr, nullptr, nullptr, -1, "/", nullptr, nullptr},
+ {"file:////path", "file", nullptr, nullptr, nullptr, -1, "/path", nullptr, nullptr},
+ {"file:////path/", "file", nullptr, nullptr, nullptr, -1, "/path/", nullptr, nullptr},
+ {"file:////path/f.txt", "file", nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
// Schemeless URLs
- {"path/f.txt", NULL, NULL, NULL, NULL, -1, "path/f.txt", NULL, NULL},
- {"path:80/f.txt", "path", NULL, NULL, NULL, -1, "80/f.txt", NULL, NULL},
- {"path/f.txt:80", "path/f.txt",NULL, NULL, NULL, -1, "80", NULL, NULL}, // Wrong.
- {"/path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
- {"/path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL},
- {"/path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL},
- {"//server/f.txt", NULL, NULL, NULL, "server", -1, "/f.txt", NULL, NULL},
- {"//server:80/f.txt", NULL, NULL, NULL, "server:80",-1, "/f.txt", NULL, NULL},
- {"//server/f.txt:80", NULL, NULL, NULL, "server", -1, "/f.txt:80", NULL, NULL},
- {"///path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
- {"///path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL},
- {"///path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL},
- {"////path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL},
- {"////path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL},
- {"////path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL},
+ {"path/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "path/f.txt", nullptr, nullptr},
+ {"path:80/f.txt", "path", nullptr, nullptr, nullptr, -1, "80/f.txt", nullptr, nullptr},
+ {"path/f.txt:80", "path/f.txt",nullptr, nullptr, nullptr,-1,"80", nullptr, nullptr}, // Wrong.
+ {"/path/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
+ {"/path:80/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path:80/f.txt",nullptr, nullptr},
+ {"/path/f.txt:80", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt:80",nullptr, nullptr},
+ {"//server/f.txt", nullptr,nullptr, nullptr, "server", -1, "/f.txt", nullptr, nullptr},
+ {"//server:80/f.txt", nullptr,nullptr, nullptr, "server:80",-1, "/f.txt", nullptr, nullptr},
+ {"//server/f.txt:80", nullptr,nullptr, nullptr, "server", -1, "/f.txt:80", nullptr, nullptr},
+ {"///path/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
+ {"///path:80/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path:80/f.txt",nullptr, nullptr},
+ {"///path/f.txt:80", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt:80",nullptr, nullptr},
+ {"////path/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt", nullptr, nullptr},
+ {"////path:80/f.txt", nullptr,nullptr, nullptr, nullptr, -1, "/path:80/f.txt",nullptr, nullptr},
+ {"////path/f.txt:80", nullptr,nullptr, nullptr, nullptr, -1, "/path/f.txt:80",nullptr, nullptr},
// Queries and refs are valid for file URLs as well.
- {"file:///foo.html?#", "file", NULL, NULL, NULL, -1, "/foo.html", "", ""},
- {"file:///foo.html?q=y#ref", "file", NULL, NULL, NULL, -1, "/foo.html", "q=y", "ref"},
+ {"file:///foo.html?#", "file", nullptr, nullptr, nullptr, -1, "/foo.html", "", ""},
+ {"file:///foo.html?q=y#ref", "file", nullptr, nullptr, nullptr, -1, "/foo.html", "q=y", "ref"},
#endif // WIN32
};
+// clang-format on
TEST(URLParser, ParseFileURL) {
// Declared outside for loop to try to catch cases in init() where we forget
@@ -506,8 +513,8 @@ TEST(URLParser, ExtractFileName) {
{"http://www.google.com/foo;bar;html", "foo"},
};
- for (size_t i = 0; i < std::size(extract_cases); i++) {
- const char* url = extract_cases[i].input;
+ for (const auto& extract_case : extract_cases) {
+ const char* url = extract_case.input;
int len = static_cast<int>(strlen(url));
Parsed parsed;
@@ -516,7 +523,7 @@ TEST(URLParser, ExtractFileName) {
Component file_name;
ExtractFileName(url, parsed.path, &file_name);
- EXPECT_TRUE(ComponentMatches(url, extract_cases[i].expected, file_name));
+ EXPECT_TRUE(ComponentMatches(url, extract_case.expected, file_name));
}
}
@@ -551,39 +558,39 @@ static bool NthParameterIs(const char* url,
return true;
}
}
- return expected_key == NULL; // We didn't find that many parameters.
+ return expected_key == nullptr; // We didn't find that many parameters.
}
TEST(URLParser, ExtractQueryKeyValue) {
- EXPECT_TRUE(NthParameterIs("http://www.google.com", 1, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs("http://www.google.com", 1, nullptr, nullptr));
// Basic case.
char a[] = "http://www.google.com?arg1=1&arg2=2&bar";
EXPECT_TRUE(NthParameterIs(a, 1, "arg1", "1"));
EXPECT_TRUE(NthParameterIs(a, 2, "arg2", "2"));
EXPECT_TRUE(NthParameterIs(a, 3, "bar", ""));
- EXPECT_TRUE(NthParameterIs(a, 4, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(a, 4, nullptr, nullptr));
// Empty param at the end.
char b[] = "http://www.google.com?foo=bar&";
EXPECT_TRUE(NthParameterIs(b, 1, "foo", "bar"));
- EXPECT_TRUE(NthParameterIs(b, 2, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(b, 2, nullptr, nullptr));
// Empty param at the beginning.
char c[] = "http://www.google.com?&foo=bar";
EXPECT_TRUE(NthParameterIs(c, 1, "", ""));
EXPECT_TRUE(NthParameterIs(c, 2, "foo", "bar"));
- EXPECT_TRUE(NthParameterIs(c, 3, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(c, 3, nullptr, nullptr));
// Empty key with value.
char d[] = "http://www.google.com?=foo";
EXPECT_TRUE(NthParameterIs(d, 1, "", "foo"));
- EXPECT_TRUE(NthParameterIs(d, 2, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(d, 2, nullptr, nullptr));
// Empty value with key.
char e[] = "http://www.google.com?foo=";
EXPECT_TRUE(NthParameterIs(e, 1, "foo", ""));
- EXPECT_TRUE(NthParameterIs(e, 2, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(e, 2, nullptr, nullptr));
// Empty key and values.
char f[] = "http://www.google.com?&&==&=";
@@ -591,37 +598,39 @@ TEST(URLParser, ExtractQueryKeyValue) {
EXPECT_TRUE(NthParameterIs(f, 2, "", ""));
EXPECT_TRUE(NthParameterIs(f, 3, "", "="));
EXPECT_TRUE(NthParameterIs(f, 4, "", ""));
- EXPECT_TRUE(NthParameterIs(f, 5, NULL, NULL));
+ EXPECT_TRUE(NthParameterIs(f, 5, nullptr, nullptr));
}
// MailtoURL --------------------------------------------------------------------
+// clang-format off
static MailtoURLParseCase mailto_cases[] = {
//|input |scheme |path |query
-{"mailto:foo@gmail.com", "mailto", "foo@gmail.com", NULL},
-{" mailto: to \t", "mailto", " to", NULL},
-{"mailto:addr1%2C%20addr2 ", "mailto", "addr1%2C%20addr2", NULL},
-{"Mailto:addr1, addr2 ", "Mailto", "addr1, addr2", NULL},
-{"mailto:addr1:addr2 ", "mailto", "addr1:addr2", NULL},
-{"mailto:?to=addr1,addr2", "mailto", NULL, "to=addr1,addr2"},
-{"mailto:?to=addr1%2C%20addr2", "mailto", NULL, "to=addr1%2C%20addr2"},
+{"mailto:foo@gmail.com", "mailto", "foo@gmail.com", nullptr},
+{" mailto: to \t", "mailto", " to", nullptr},
+{"mailto:addr1%2C%20addr2 ", "mailto", "addr1%2C%20addr2", nullptr},
+{"Mailto:addr1, addr2 ", "Mailto", "addr1, addr2", nullptr},
+{"mailto:addr1:addr2 ", "mailto", "addr1:addr2", nullptr},
+{"mailto:?to=addr1,addr2", "mailto", nullptr, "to=addr1,addr2"},
+{"mailto:?to=addr1%2C%20addr2", "mailto", nullptr, "to=addr1%2C%20addr2"},
{"mailto:addr1?to=addr2", "mailto", "addr1", "to=addr2"},
-{"mailto:?body=#foobar#", "mailto", NULL, "body=#foobar#",},
+{"mailto:?body=#foobar#", "mailto", nullptr, "body=#foobar#",},
{"mailto:#?body=#foobar#", "mailto", "#", "body=#foobar#"},
};
+// clang-format on
TEST(URLParser, MailtoUrl) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < std::size(mailto_cases); ++i) {
- const char* url = mailto_cases[i].input;
+ for (const auto& mailto_case : mailto_cases) {
+ const char* url = mailto_case.input;
ParseMailtoURL(url, static_cast<int>(strlen(url)), &parsed);
int port = ParsePort(url, parsed.port);
- EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].scheme, parsed.scheme));
- EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].path, parsed.path));
- EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
+ EXPECT_TRUE(ComponentMatches(url, mailto_case.scheme, parsed.scheme));
+ EXPECT_TRUE(ComponentMatches(url, mailto_case.path, parsed.path));
+ EXPECT_TRUE(ComponentMatches(url, mailto_case.query, parsed.query));
EXPECT_EQ(PORT_UNSPECIFIED, port);
// The remaining components are never used for mailto URLs.
@@ -634,46 +643,50 @@ TEST(URLParser, MailtoUrl) {
// Various incarnations of filesystem URLs.
static FileSystemURLParseCase filesystem_cases[] = {
- // Regular URL with all the parts
-{"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http", "user", "pass", "foo", 21, "/temporary", "/bar;par", "b", "c"},
-{"filesystem:https://foo/persistent/bar;par/", "https", NULL, NULL, "foo", -1, "/persistent", "/bar;par/", NULL, NULL},
-{"filesystem:file:///persistent/bar;par/", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", NULL, NULL},
-{"filesystem:file:///persistent/bar;par/?query#ref", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", "query", "ref"},
-{"filesystem:file:///persistent", "file", NULL, NULL, NULL, -1, "/persistent", "", NULL, NULL},
+ // Regular URL with all the parts
+ {"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http", "user",
+ "pass", "foo", 21, "/temporary", "/bar;par", "b", "c"},
+ {"filesystem:https://foo/persistent/bar;par/", "https", nullptr, nullptr,
+ "foo", -1, "/persistent", "/bar;par/", nullptr, nullptr},
+ {"filesystem:file:///persistent/bar;par/", "file", nullptr, nullptr,
+ nullptr, -1, "/persistent", "/bar;par/", nullptr, nullptr},
+ {"filesystem:file:///persistent/bar;par/?query#ref", "file", nullptr,
+ nullptr, nullptr, -1, "/persistent", "/bar;par/", "query", "ref"},
+ {"filesystem:file:///persistent", "file", nullptr, nullptr, nullptr, -1,
+ "/persistent", "", nullptr, nullptr},
};
TEST(URLParser, FileSystemURL) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < std::size(filesystem_cases); i++) {
- const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
- const char* url = parsecase->input;
+ for (const auto& filesystem_case : filesystem_cases) {
+ const char* url = filesystem_case.input;
ParseFileSystemURL(url, static_cast<int>(strlen(url)), &parsed);
EXPECT_TRUE(ComponentMatches(url, "filesystem", parsed.scheme));
- EXPECT_EQ(!parsecase->inner_scheme, !parsed.inner_parsed());
+ EXPECT_EQ(!filesystem_case.inner_scheme, !parsed.inner_parsed());
// Only check the inner_parsed if there is one.
if (parsed.inner_parsed()) {
- EXPECT_TRUE(ComponentMatches(url, parsecase->inner_scheme,
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.inner_scheme,
parsed.inner_parsed()->scheme));
- EXPECT_TRUE(ComponentMatches(url, parsecase->inner_username,
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.inner_username,
parsed.inner_parsed()->username));
- EXPECT_TRUE(ComponentMatches(url, parsecase->inner_password,
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.inner_password,
parsed.inner_parsed()->password));
- EXPECT_TRUE(ComponentMatches(url, parsecase->inner_host,
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.inner_host,
parsed.inner_parsed()->host));
int port = ParsePort(url, parsed.inner_parsed()->port);
- EXPECT_EQ(parsecase->inner_port, port);
+ EXPECT_EQ(filesystem_case.inner_port, port);
// The remaining components are never used for filesystem URLs.
ExpectInvalidComponent(parsed.inner_parsed()->query);
ExpectInvalidComponent(parsed.inner_parsed()->ref);
}
- EXPECT_TRUE(ComponentMatches(url, parsecase->path, parsed.path));
- EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
- EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.path, parsed.path));
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.query, parsed.query));
+ EXPECT_TRUE(ComponentMatches(url, filesystem_case.ref, parsed.ref));
// The remaining components are never used for filesystem URLs.
ExpectInvalidComponent(parsed.username);
diff --git a/url/url_util.cc b/url/url_util.cc
index 001c50e72..9258cfcfa 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -147,18 +147,6 @@ enum WhitespaceRemovalPolicy {
DO_NOT_REMOVE_WHITESPACE,
};
-// This template converts a given character type to the corresponding
-// StringPiece type.
-template<typename CHAR> struct CharToStringPiece {
-};
-template<> struct CharToStringPiece<char> {
- typedef base::StringPiece Piece;
-};
-template <>
-struct CharToStringPiece<char16_t> {
- typedef base::StringPiece16 Piece;
-};
-
// Given a string and a range inside the string, compares it to the given
// lower-case |compare_to| buffer.
template<typename CHAR>
@@ -168,8 +156,7 @@ inline bool DoCompareSchemeComponent(const CHAR* spec,
if (component.is_empty())
return compare_to[0] == 0; // When component is empty, match empty scheme.
return base::EqualsCaseInsensitiveASCII(
- typename CharToStringPiece<CHAR>::Piece(&spec[component.begin],
- component.len),
+ std::basic_string_view(&spec[component.begin], component.len),
compare_to);
}
@@ -185,8 +172,7 @@ bool DoIsInSchemes(const CHAR* spec,
for (const SchemeWithType& scheme_with_type : schemes) {
if (base::EqualsCaseInsensitiveASCII(
- typename CharToStringPiece<CHAR>::Piece(&spec[scheme.begin],
- scheme.len),
+ std::basic_string_view(&spec[scheme.begin], scheme.len),
scheme_with_type.scheme)) {
*type = scheme_with_type.type;
return true;
@@ -735,8 +721,8 @@ bool FindAndCompareScheme(const char16_t* str,
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
-bool DomainIs(base::StringPiece canonical_host,
- base::StringPiece canonical_domain) {
+bool DomainIs(std::string_view canonical_host,
+ std::string_view canonical_domain) {
if (canonical_host.empty() || canonical_domain.empty())
return false;
@@ -754,7 +740,7 @@ bool DomainIs(base::StringPiece canonical_host,
const char* host_first_pos =
canonical_host.data() + host_len - canonical_domain.length();
- if (base::StringPiece(host_first_pos, canonical_domain.length()) !=
+ if (std::string_view(host_first_pos, canonical_domain.length()) !=
canonical_domain) {
return false;
}
@@ -771,7 +757,7 @@ bool DomainIs(base::StringPiece canonical_host,
return true;
}
-bool HostIsIPAddress(base::StringPiece host) {
+bool HostIsIPAddress(std::string_view host) {
STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output;
url::CanonHostInfo host_info;
url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
@@ -847,19 +833,18 @@ bool ReplaceComponents(const char* spec,
charset_converter, output, out_parsed);
}
-void DecodeURLEscapeSequences(const char* input,
- int length,
+void DecodeURLEscapeSequences(std::string_view input,
DecodeURLMode mode,
CanonOutputW* output) {
- if (length <= 0)
+ if (input.empty()) {
return;
+ }
STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
- size_t length_size_t = static_cast<size_t>(length);
- for (size_t i = 0; i < length_size_t; i++) {
+ for (size_t i = 0; i < input.length(); i++) {
if (input[i] == '%') {
unsigned char ch;
- if (DecodeEscaped(input, &i, length_size_t, &ch)) {
+ if (DecodeEscaped(input.data(), &i, input.length(), &ch)) {
unescaped_chars.push_back(ch);
} else {
// Invalid escape sequence, copy the percent literal.
@@ -908,16 +893,20 @@ void DecodeURLEscapeSequences(const char* input,
}
}
-void EncodeURIComponent(const char* input, int length, CanonOutput* output) {
- for (int i = 0; i < length; ++i) {
- unsigned char c = static_cast<unsigned char>(input[i]);
- if (IsComponentChar(c))
+void EncodeURIComponent(std::string_view input, CanonOutput* output) {
+ for (unsigned char c : input) {
+ if (IsComponentChar(c)) {
output->push_back(c);
- else
+ } else {
AppendEscapedChar(c, output);
+ }
}
}
+bool IsURIComponentChar(char c) {
+ return IsComponentChar(c);
+}
+
bool CompareSchemeComponent(const char* spec,
const Component& component,
const char* compare_to) {
@@ -930,4 +919,16 @@ bool CompareSchemeComponent(const char16_t* spec,
return DoCompareSchemeComponent(spec, component, compare_to);
}
+bool HasInvalidURLEscapeSequences(std::string_view input) {
+ for (size_t i = 0; i < input.size(); i++) {
+ if (input[i] == '%') {
+ unsigned char ch;
+ if (!DecodeEscaped(input.data(), &i, input.size(), &ch)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
} // namespace url
diff --git a/url/url_util.h b/url/url_util.h
index 670552a8c..8c94c7a4f 100644
--- a/url/url_util.h
+++ b/url/url_util.h
@@ -7,10 +7,10 @@
#include <memory>
#include <string>
+#include <string_view>
#include <vector>
#include "base/component_export.h"
-#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
@@ -203,12 +203,12 @@ bool GetStandardSchemeType(const char16_t* spec,
// input domain should match host canonicalization rules. i.e. it should be
// lowercase except for escape chars.
COMPONENT_EXPORT(URL)
-bool DomainIs(base::StringPiece canonical_host,
- base::StringPiece canonical_domain);
+bool DomainIs(std::string_view canonical_host,
+ std::string_view canonical_domain);
// Returns true if the hostname is an IP address. Note: this function isn't very
// cheap, as it must re-parse the host to verify.
-COMPONENT_EXPORT(URL) bool HostIsIPAddress(base::StringPiece host);
+COMPONENT_EXPORT(URL) bool HostIsIPAddress(std::string_view host);
// URL library wrappers --------------------------------------------------------
@@ -299,15 +299,29 @@ enum class DecodeURLMode {
// Unescapes the given string using URL escaping rules.
COMPONENT_EXPORT(URL)
-void DecodeURLEscapeSequences(const char* input,
- int length,
+void DecodeURLEscapeSequences(std::string_view input,
DecodeURLMode mode,
CanonOutputW* output);
// Escapes the given string as defined by the JS method encodeURIComponent. See
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
COMPONENT_EXPORT(URL)
-void EncodeURIComponent(const char* input, int length, CanonOutput* output);
+void EncodeURIComponent(std::string_view input, CanonOutput* output);
+
+// Returns true if `c` is a character that does not require escaping in
+// encodeURIComponent.
+// TODO(crbug.com/1481056): Remove this when event-level reportEvent is removed
+// (if it is still this function's only consumer).
+COMPONENT_EXPORT(URL)
+bool IsURIComponentChar(char c);
+
+// Checks an arbitrary string for invalid escape sequences.
+//
+// A valid percent-encoding is '%' followed by exactly two hex-digits. This
+// function returns true if an occurrence of '%' is found and followed by
+// anything other than two hex-digits.
+COMPONENT_EXPORT(URL)
+bool HasInvalidURLEscapeSequences(std::string_view input);
} // namespace url
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index 3a2e6e6d5..300887a01 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -6,7 +6,8 @@
#include <stddef.h>
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest-message.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -36,8 +37,8 @@ TEST_F(URLUtilTest, FindAndCompareScheme) {
// Simple case where the scheme is found and matches.
const char kStr1[] = "http://www.com/";
- EXPECT_TRUE(FindAndCompareScheme(
- kStr1, static_cast<int>(strlen(kStr1)), "http", NULL));
+ EXPECT_TRUE(FindAndCompareScheme(kStr1, static_cast<int>(strlen(kStr1)),
+ "http", nullptr));
EXPECT_TRUE(FindAndCompareScheme(
kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme));
EXPECT_TRUE(found_scheme == Component(0, 4));
@@ -159,18 +160,22 @@ TEST_F(URLUtilTest, ReplaceComponents) {
// Check that the following calls do not cause crash
Replacements<char> replacements;
replacements.SetRef("test", Component(0, 4));
- ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
- ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+ ReplaceComponents(nullptr, 0, parsed, replacements, nullptr, &output,
+ &new_parsed);
+ ReplaceComponents("", 0, parsed, replacements, nullptr, &output, &new_parsed);
replacements.ClearRef();
replacements.SetHost("test", Component(0, 4));
- ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
- ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+ ReplaceComponents(nullptr, 0, parsed, replacements, nullptr, &output,
+ &new_parsed);
+ ReplaceComponents("", 0, parsed, replacements, nullptr, &output, &new_parsed);
replacements.ClearHost();
- ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
- ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
- ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
- ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+ ReplaceComponents(nullptr, 0, parsed, replacements, nullptr, &output,
+ &new_parsed);
+ ReplaceComponents("", 0, parsed, replacements, nullptr, &output, &new_parsed);
+ ReplaceComponents(nullptr, 0, parsed, replacements, nullptr, &output,
+ &new_parsed);
+ ReplaceComponents("", 0, parsed, replacements, nullptr, &output, &new_parsed);
}
static std::string CheckReplaceScheme(const char* base_url,
@@ -178,7 +183,7 @@ static std::string CheckReplaceScheme(const char* base_url,
// Make sure the input is canonicalized.
RawCanonOutput<32> original;
Parsed original_parsed;
- Canonicalize(base_url, strlen(base_url), true, NULL, &original,
+ Canonicalize(base_url, strlen(base_url), true, nullptr, &original,
&original_parsed);
Replacements<char> replacements;
@@ -188,7 +193,7 @@ static std::string CheckReplaceScheme(const char* base_url,
StdStringCanonOutput output(&output_string);
Parsed output_parsed;
ReplaceComponents(original.data(), original.length(), original_parsed,
- replacements, NULL, &output, &output_parsed);
+ replacements, nullptr, &output, &output_parsed);
output.Complete();
return output_string;
@@ -256,18 +261,17 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
{"%ef%bf%bd", "\xef\xbf\xbd"},
};
- for (size_t i = 0; i < std::size(decode_cases); i++) {
- const char* input = decode_cases[i].input;
+ for (const auto& decode_case : decode_cases) {
RawCanonOutputT<char16_t> output;
- DecodeURLEscapeSequences(input, strlen(input),
+ DecodeURLEscapeSequences(decode_case.input,
DecodeURLMode::kUTF8OrIsomorphic, &output);
- EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8(std::u16string(
- output.data(), output.length())));
+ EXPECT_EQ(decode_case.output, base::UTF16ToUTF8(std::u16string(
+ output.data(), output.length())));
RawCanonOutputT<char16_t> output_utf8;
- DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+ DecodeURLEscapeSequences(decode_case.input, DecodeURLMode::kUTF8,
&output_utf8);
- EXPECT_EQ(decode_cases[i].output,
+ EXPECT_EQ(decode_case.output,
base::UTF16ToUTF8(
std::u16string(output_utf8.data(), output_utf8.length())));
}
@@ -275,8 +279,7 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
// Our decode should decode %00
const char zero_input[] = "%00";
RawCanonOutputT<char16_t> zero_output;
- DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
- &zero_output);
+ DecodeURLEscapeSequences(zero_input, DecodeURLMode::kUTF8, &zero_output);
EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(),
zero_output.length())));
@@ -298,18 +301,17 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
{0xfffd, 0xfffd, 0}},
};
- for (const auto& test : utf8_decode_cases) {
- const char* input = test.input;
+ for (const auto& utf8_decode_case : utf8_decode_cases) {
RawCanonOutputT<char16_t> output_iso;
- DecodeURLEscapeSequences(input, strlen(input),
+ DecodeURLEscapeSequences(utf8_decode_case.input,
DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
- EXPECT_EQ(std::u16string(test.expected_iso.data()),
+ EXPECT_EQ(std::u16string(utf8_decode_case.expected_iso.data()),
std::u16string(output_iso.data(), output_iso.length()));
RawCanonOutputT<char16_t> output_utf8;
- DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+ DecodeURLEscapeSequences(utf8_decode_case.input, DecodeURLMode::kUTF8,
&output_utf8);
- EXPECT_EQ(std::u16string(test.expected_utf8.data()),
+ EXPECT_EQ(std::u16string(utf8_decode_case.expected_utf8.data()),
std::u16string(output_utf8.data(), output_utf8.length()));
}
}
@@ -338,12 +340,11 @@ TEST_F(URLUtilTest, TestEncodeURIComponent) {
"pqrstuvwxyz%7B%7C%7D~%7F"},
};
- for (size_t i = 0; i < std::size(encode_cases); i++) {
- const char* input = encode_cases[i].input;
+ for (const auto& encode_case : encode_cases) {
RawCanonOutputT<char> buffer;
- EncodeURIComponent(input, strlen(input), &buffer);
+ EncodeURIComponent(encode_case.input, &buffer);
std::string output(buffer.data(), buffer.length());
- EXPECT_EQ(encode_cases[i].output, output);
+ EXPECT_EQ(encode_case.output, output);
}
}
@@ -416,23 +417,25 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
// adding the requested dot doesn't seem wrong either.
{"aaa://a\\", "aaa:.", true, "aaa://a\\."}};
- for (size_t i = 0; i < std::size(resolve_non_standard_cases); i++) {
- const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
+ for (const auto& test : resolve_non_standard_cases) {
+ SCOPED_TRACE(testing::Message()
+ << "base: " << test.base << ", rel: " << test.rel);
+
Parsed base_parsed;
- ParsePathURL(test_data.base, strlen(test_data.base), false, &base_parsed);
+ ParsePathURL(test.base, strlen(test.base), false, &base_parsed);
std::string resolved;
StdStringCanonOutput output(&resolved);
Parsed resolved_parsed;
- bool valid = ResolveRelative(test_data.base, strlen(test_data.base),
- base_parsed, test_data.rel,
- strlen(test_data.rel), NULL, &output,
- &resolved_parsed);
+ bool valid =
+ ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel,
+ strlen(test.rel), nullptr, &output, &resolved_parsed);
output.Complete();
- EXPECT_EQ(test_data.is_valid, valid) << i;
- if (test_data.is_valid && valid)
- EXPECT_EQ(test_data.out, resolved) << i;
+ EXPECT_EQ(test.is_valid, valid);
+ if (test.is_valid && valid) {
+ EXPECT_EQ(test.out, resolved);
+ }
}
}
@@ -449,10 +452,8 @@ TEST_F(URLUtilTest, TestNoRefComponent) {
StdStringCanonOutput output(&resolved);
Parsed resolved_parsed;
- bool valid = ResolveRelative(base, strlen(base),
- base_parsed, rel,
- strlen(rel), NULL, &output,
- &resolved_parsed);
+ bool valid = ResolveRelative(base, strlen(base), base_parsed, rel,
+ strlen(rel), nullptr, &output, &resolved_parsed);
EXPECT_TRUE(valid);
EXPECT_FALSE(resolved_parsed.ref.is_valid());
}
@@ -495,7 +496,7 @@ TEST_F(URLUtilTest, PotentiallyDanglingMarkup) {
Parsed resolved_parsed;
bool valid =
ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel,
- strlen(test.rel), NULL, &output, &resolved_parsed);
+ strlen(test.rel), nullptr, &output, &resolved_parsed);
ASSERT_TRUE(valid);
output.Complete();
@@ -588,7 +589,7 @@ TEST_F(URLUtilTest, TestDomainIs) {
}
namespace {
-absl::optional<std::string> CanonicalizeSpec(base::StringPiece spec,
+absl::optional<std::string> CanonicalizeSpec(std::string_view spec,
bool trim_path_end) {
std::string canonicalized;
StdStringCanonOutput output(&canonicalized);
@@ -635,4 +636,87 @@ TEST_F(URLUtilTest, TestCanonicalizeIdempotencyWithLeadingControlCharacters) {
}
}
+TEST_F(URLUtilTest, TestHasInvalidURLEscapeSequences) {
+ struct TestCase {
+ const char* input;
+ bool is_invalid;
+ } cases[] = {
+ // Edge cases.
+ {"", false},
+ {"%", true},
+
+ // Single regular chars with no escaping are valid.
+ {"a", false},
+ {"g", false},
+ {"A", false},
+ {"G", false},
+ {":", false},
+ {"]", false},
+ {"\x00", false}, // ASCII 'NUL' char
+ {"\x01", false}, // ASCII 'SOH' char
+ {"\xC2\xA3", false}, // UTF-8 encoded '£'.
+
+ // Longer strings without escaping are valid.
+ {"Hello world", false},
+ {"Here: [%25] <-- a percent-encoded percent character.", false},
+
+ // Valid %-escaped sequences ('%' followed by two hex digits).
+ {"%00", false},
+ {"%20", false},
+ {"%02", false},
+ {"%ff", false},
+ {"%FF", false},
+ {"%0a", false},
+ {"%0A", false},
+ {"abc%FF", false},
+ {"%FFabc", false},
+ {"abc%FFabc", false},
+ {"hello %FF world", false},
+ {"%20hello%20world", false},
+ {"%25", false},
+ {"%25%25", false},
+ {"%250", false},
+ {"%259", false},
+ {"%25A", false},
+ {"%25F", false},
+ {"%0a:", false},
+
+ // '%' followed by a single character is never a valid sequence.
+ {"%%", true},
+ {"%2", true},
+ {"%a", true},
+ {"%A", true},
+ {"%g", true},
+ {"%G", true},
+ {"%:", true},
+ {"%[", true},
+ {"%F", true},
+ {"%\xC2\xA3", true}, //% followed by UTF-8 encoded '£'.
+
+ // String ends on a potential escape sequence but without two hex-digits
+ // is invalid.
+ {"abc%", true},
+ {"abc%%", true},
+ {"abc%%%", true},
+ {"abc%a", true},
+
+ // One hex and one non-hex digit is invalid.
+ {"%a:", true},
+ {"%:a", true},
+ {"%::", true},
+ {"%ag", true},
+ {"%ga", true},
+ {"%-1", true},
+ {"%1-", true},
+ {"%0\xC2\xA3", true}, // %0£.
+ };
+
+ for (TestCase test_case : cases) {
+ const char* input = test_case.input;
+ bool result = HasInvalidURLEscapeSequences(input);
+ EXPECT_EQ(test_case.is_invalid, result)
+ << "Invalid result for '" << input << "'";
+ }
+}
+
} // namespace url