diff options
author | Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com> | 2023-03-22 02:58:49 -0800 |
---|---|---|
committer | Patrick Rohr <prohr@google.com> | 2023-03-22 04:40:18 -0700 |
commit | c175721cfcc03e339122be17d569239df9762b2b (patch) | |
tree | 64fed42a909067904ef77b573e554a172241bdfe /url/url_canon_etc.cc | |
parent | 26b17131b27be4b84fc089d96dcc1998e686ecf9 (diff) | |
download | cronet-c175721cfcc03e339122be17d569239df9762b2b.tar.gz |
Import Cronet version 110.0.5481.154
Project import generated by Copybara.
FolderOrigin-RevId: /tmp/copybara-origin/src
Test: none
Change-Id: I534a69efa61b40fdc95613bce5bc5dd9a432f646
Diffstat (limited to 'url/url_canon_etc.cc')
-rw-r--r-- | url/url_canon_etc.cc | 49 |
1 files changed, 30 insertions, 19 deletions
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc index 695e8dd19..3d1cb938e 100644 --- a/url/url_canon_etc.cc +++ b/url/url_canon_etc.cc @@ -31,12 +31,22 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input, // Fast verification that there's nothing that needs removal. This is the 99% // case, so we want it to be fast and don't care about impacting the speed // when we do find whitespace. - int found_whitespace = false; - for (int i = 0; i < input_len; i++) { - if (!IsRemovableURLWhitespace(input[i])) - continue; - found_whitespace = true; - break; + bool found_whitespace = false; + if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) { + // For large strings, memchr is much faster than any scalar code we can + // write, even if we need to run it three times. (If this turns out to still + // be a bottleneck, we could write our own vector code, but given that + // memchr is so fast, it's unlikely to be relevant.) + found_whitespace = memchr(input, '\n', input_len) != nullptr || + memchr(input, '\r', input_len) != nullptr || + memchr(input, '\t', input_len) != nullptr; + } else { + for (int i = 0; i < input_len; i++) { + if (!IsRemovableURLWhitespace(input[i])) + continue; + found_whitespace = true; + break; + } } if (!found_whitespace) { @@ -72,6 +82,7 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input, // Contains the canonical version of each possible input letter in the scheme // (basically, lower-cased). The corresponding entry will be 0 if the letter // is not allowed in a scheme. +// clang-format off const char kSchemeCanonical[0x80] = { // 00-1f: all are invalid 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -88,6 +99,7 @@ const char kSchemeCanonical[0x80] = { 0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', // p q r s t u v w x y z { | } ~ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 }; +// clang-format on // This could be a table lookup as well by setting the high bit for each // valid character, but it's only called once per URL, and it makes the lookup @@ -96,12 +108,12 @@ inline bool IsSchemeFirstChar(unsigned char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } -template<typename CHAR, typename UCHAR> +template <typename CHAR, typename UCHAR> bool DoScheme(const CHAR* spec, const Component& scheme, CanonOutput* output, Component* out_scheme) { - if (!scheme.is_nonempty()) { + if (scheme.is_empty()) { // Scheme is unspecified or empty, convert to empty by appending a colon. *out_scheme = Component(output->length(), 0); output->push_back(':'); @@ -161,7 +173,7 @@ bool DoScheme(const CHAR* spec, // *_spec strings. Typically, these specs will be the same (we're // canonicalizing a single source string), but may be different when // replacing components. -template<typename CHAR, typename UCHAR> +template <typename CHAR, typename UCHAR> bool DoUserInfo(const CHAR* username_spec, const Component& username, const CHAR* password_spec, @@ -169,7 +181,7 @@ bool DoUserInfo(const CHAR* username_spec, CanonOutput* output, Component* out_username, Component* out_password) { - if (username.len <= 0 && password.len <= 0) { + if (username.is_empty() && password.is_empty()) { // Common case: no user info. We strip empty username/passwords. *out_username = Component(); *out_password = Component(); @@ -178,7 +190,7 @@ bool DoUserInfo(const CHAR* username_spec, // Write the username. out_username->begin = output->length(); - if (username.len > 0) { + if (username.is_nonempty()) { // This will escape characters not valid for the username. AppendStringOfType(&username_spec[username.begin], static_cast<size_t>(username.len), CHAR_USERINFO, @@ -188,7 +200,7 @@ bool DoUserInfo(const CHAR* username_spec, // When there is a password, we need the separator. Note that we strip // empty but specified passwords. - if (password.len > 0) { + if (password.is_nonempty()) { output->push_back(':'); out_password->begin = output->length(); AppendStringOfType(&password_spec[password.begin], @@ -209,7 +221,7 @@ inline void WritePortInt(char* output, int output_len, int port) { } // This function will prepend the colon if there will be a port. -template<typename CHAR, typename UCHAR> +template <typename CHAR, typename UCHAR> bool DoPort(const CHAR* spec, const Component& port, int default_port_for_scheme, @@ -284,7 +296,7 @@ const bool kShouldEscapeCharInFragment[0x80] = { }; // clang-format on -template<typename CHAR, typename UCHAR> +template <typename CHAR, typename UCHAR> void DoCanonicalizeRef(const CHAR* spec, const Component& ref, CanonOutput* output, @@ -364,9 +376,9 @@ bool CanonicalizeUserInfo(const char* username_source, CanonOutput* output, Component* out_username, Component* out_password) { - return DoUserInfo<char, unsigned char>( - username_source, username, password_source, password, - output, out_username, out_password); + return DoUserInfo<char, unsigned char>(username_source, username, + password_source, password, output, + out_username, out_password); } bool CanonicalizeUserInfo(const char16_t* username_source, @@ -386,8 +398,7 @@ bool CanonicalizePort(const char* spec, int default_port_for_scheme, CanonOutput* output, Component* out_port) { - return DoPort<char, unsigned char>(spec, port, - default_port_for_scheme, + return DoPort<char, unsigned char>(spec, port, default_port_for_scheme, output, out_port); } |