summaryrefslogtreecommitdiff
path: root/url/url_canon_etc.cc
diff options
context:
space:
mode:
authorCronet Mainline Eng <cronet-mainline-eng+copybara@google.com>2023-03-22 02:58:49 -0800
committerPatrick Rohr <prohr@google.com>2023-03-22 04:40:18 -0700
commitc175721cfcc03e339122be17d569239df9762b2b (patch)
tree64fed42a909067904ef77b573e554a172241bdfe /url/url_canon_etc.cc
parent26b17131b27be4b84fc089d96dcc1998e686ecf9 (diff)
downloadcronet-c175721cfcc03e339122be17d569239df9762b2b.tar.gz
Import Cronet version 110.0.5481.154
Project import generated by Copybara. FolderOrigin-RevId: /tmp/copybara-origin/src Test: none Change-Id: I534a69efa61b40fdc95613bce5bc5dd9a432f646
Diffstat (limited to 'url/url_canon_etc.cc')
-rw-r--r--url/url_canon_etc.cc49
1 files changed, 30 insertions, 19 deletions
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index 695e8dd19..3d1cb938e 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc
@@ -31,12 +31,22 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input,
// Fast verification that there's nothing that needs removal. This is the 99%
// case, so we want it to be fast and don't care about impacting the speed
// when we do find whitespace.
- int found_whitespace = false;
- for (int i = 0; i < input_len; i++) {
- if (!IsRemovableURLWhitespace(input[i]))
- continue;
- found_whitespace = true;
- break;
+ bool found_whitespace = false;
+ if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
+ // For large strings, memchr is much faster than any scalar code we can
+ // write, even if we need to run it three times. (If this turns out to still
+ // be a bottleneck, we could write our own vector code, but given that
+ // memchr is so fast, it's unlikely to be relevant.)
+ found_whitespace = memchr(input, '\n', input_len) != nullptr ||
+ memchr(input, '\r', input_len) != nullptr ||
+ memchr(input, '\t', input_len) != nullptr;
+ } else {
+ for (int i = 0; i < input_len; i++) {
+ if (!IsRemovableURLWhitespace(input[i]))
+ continue;
+ found_whitespace = true;
+ break;
+ }
}
if (!found_whitespace) {
@@ -72,6 +82,7 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input,
// Contains the canonical version of each possible input letter in the scheme
// (basically, lower-cased). The corresponding entry will be 0 if the letter
// is not allowed in a scheme.
+// clang-format off
const char kSchemeCanonical[0x80] = {
// 00-1f: all are invalid
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -88,6 +99,7 @@ const char kSchemeCanonical[0x80] = {
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// p q r s t u v w x y z { | } ~
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 };
+// clang-format on
// This could be a table lookup as well by setting the high bit for each
// valid character, but it's only called once per URL, and it makes the lookup
@@ -96,12 +108,12 @@ inline bool IsSchemeFirstChar(unsigned char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoScheme(const CHAR* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- if (!scheme.is_nonempty()) {
+ if (scheme.is_empty()) {
// Scheme is unspecified or empty, convert to empty by appending a colon.
*out_scheme = Component(output->length(), 0);
output->push_back(':');
@@ -161,7 +173,7 @@ bool DoScheme(const CHAR* spec,
// *_spec strings. Typically, these specs will be the same (we're
// canonicalizing a single source string), but may be different when
// replacing components.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoUserInfo(const CHAR* username_spec,
const Component& username,
const CHAR* password_spec,
@@ -169,7 +181,7 @@ bool DoUserInfo(const CHAR* username_spec,
CanonOutput* output,
Component* out_username,
Component* out_password) {
- if (username.len <= 0 && password.len <= 0) {
+ if (username.is_empty() && password.is_empty()) {
// Common case: no user info. We strip empty username/passwords.
*out_username = Component();
*out_password = Component();
@@ -178,7 +190,7 @@ bool DoUserInfo(const CHAR* username_spec,
// Write the username.
out_username->begin = output->length();
- if (username.len > 0) {
+ if (username.is_nonempty()) {
// This will escape characters not valid for the username.
AppendStringOfType(&username_spec[username.begin],
static_cast<size_t>(username.len), CHAR_USERINFO,
@@ -188,7 +200,7 @@ bool DoUserInfo(const CHAR* username_spec,
// When there is a password, we need the separator. Note that we strip
// empty but specified passwords.
- if (password.len > 0) {
+ if (password.is_nonempty()) {
output->push_back(':');
out_password->begin = output->length();
AppendStringOfType(&password_spec[password.begin],
@@ -209,7 +221,7 @@ inline void WritePortInt(char* output, int output_len, int port) {
}
// This function will prepend the colon if there will be a port.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoPort(const CHAR* spec,
const Component& port,
int default_port_for_scheme,
@@ -284,7 +296,7 @@ const bool kShouldEscapeCharInFragment[0x80] = {
};
// clang-format on
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
void DoCanonicalizeRef(const CHAR* spec,
const Component& ref,
CanonOutput* output,
@@ -364,9 +376,9 @@ bool CanonicalizeUserInfo(const char* username_source,
CanonOutput* output,
Component* out_username,
Component* out_password) {
- return DoUserInfo<char, unsigned char>(
- username_source, username, password_source, password,
- output, out_username, out_password);
+ return DoUserInfo<char, unsigned char>(username_source, username,
+ password_source, password, output,
+ out_username, out_password);
}
bool CanonicalizeUserInfo(const char16_t* username_source,
@@ -386,8 +398,7 @@ bool CanonicalizePort(const char* spec,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
- return DoPort<char, unsigned char>(spec, port,
- default_port_for_scheme,
+ return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
output, out_port);
}