Import Cronet version 110.0.5481.154

Project import generated by Copybara. FolderOrigin-RevId: /tmp/copybara-origin/src Test: none Change-Id: I534a69efa61b40fdc95613bce5bc5dd9a432f646
author: Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com> 2023-03-22 02:58:49 -0800
committer: Patrick Rohr <prohr@google.com> 2023-03-22 04:40:18 -0700
commit: c175721cfcc03e339122be17d569239df9762b2b (patch)
tree: 64fed42a909067904ef77b573e554a172241bdfe /url/url_canon_etc.cc
parent: 26b17131b27be4b84fc089d96dcc1998e686ecf9 (diff)
download: cronet-c175721cfcc03e339122be17d569239df9762b2b.tar.gz
1 files changed, 30 insertions, 19 deletions
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index 695e8dd19..3d1cb938e 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc
@@ -31,12 +31,22 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input,
   // Fast verification that there's nothing that needs removal. This is the 99%
   // case, so we want it to be fast and don't care about impacting the speed
   // when we do find whitespace.
-  int found_whitespace = false;
-  for (int i = 0; i < input_len; i++) {
-    if (!IsRemovableURLWhitespace(input[i]))
-      continue;
-    found_whitespace = true;
-    break;
+  bool found_whitespace = false;
+  if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
+    // For large strings, memchr is much faster than any scalar code we can
+    // write, even if we need to run it three times. (If this turns out to still
+    // be a bottleneck, we could write our own vector code, but given that
+    // memchr is so fast, it's unlikely to be relevant.)
+    found_whitespace = memchr(input, '\n', input_len) != nullptr ||
+                       memchr(input, '\r', input_len) != nullptr ||
+                       memchr(input, '\t', input_len) != nullptr;
+  } else {
+    for (int i = 0; i < input_len; i++) {
+      if (!IsRemovableURLWhitespace(input[i]))
+        continue;
+      found_whitespace = true;
+      break;
+    }
   }
 
   if (!found_whitespace) {
@@ -72,6 +82,7 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input,
 // Contains the canonical version of each possible input letter in the scheme
 // (basically, lower-cased). The corresponding entry will be 0 if the letter
 // is not allowed in a scheme.
+// clang-format off
 const char kSchemeCanonical[0x80] = {
 // 00-1f: all are invalid
      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
@@ -88,6 +99,7 @@ const char kSchemeCanonical[0x80] = {
      0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',  0 ,  0 ,  0 ,  0 ,  0 };
+// clang-format on
 
 // This could be a table lookup as well by setting the high bit for each
 // valid character, but it's only called once per URL, and it makes the lookup
@@ -96,12 +108,12 @@ inline bool IsSchemeFirstChar(unsigned char c) {
   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
 }
 
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoScheme(const CHAR* spec,
               const Component& scheme,
               CanonOutput* output,
               Component* out_scheme) {
-  if (!scheme.is_nonempty()) {
+  if (scheme.is_empty()) {
     // Scheme is unspecified or empty, convert to empty by appending a colon.
     *out_scheme = Component(output->length(), 0);
     output->push_back(':');
@@ -161,7 +173,7 @@ bool DoScheme(const CHAR* spec,
 // *_spec strings. Typically, these specs will be the same (we're
 // canonicalizing a single source string), but may be different when
 // replacing components.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoUserInfo(const CHAR* username_spec,
                 const Component& username,
                 const CHAR* password_spec,
@@ -169,7 +181,7 @@ bool DoUserInfo(const CHAR* username_spec,
                 CanonOutput* output,
                 Component* out_username,
                 Component* out_password) {
-  if (username.len <= 0 && password.len <= 0) {
+  if (username.is_empty() && password.is_empty()) {
     // Common case: no user info. We strip empty username/passwords.
     *out_username = Component();
     *out_password = Component();
@@ -178,7 +190,7 @@ bool DoUserInfo(const CHAR* username_spec,
 
   // Write the username.
   out_username->begin = output->length();
-  if (username.len > 0) {
+  if (username.is_nonempty()) {
     // This will escape characters not valid for the username.
     AppendStringOfType(&username_spec[username.begin],
                        static_cast<size_t>(username.len), CHAR_USERINFO,
@@ -188,7 +200,7 @@ bool DoUserInfo(const CHAR* username_spec,
 
   // When there is a password, we need the separator. Note that we strip
   // empty but specified passwords.
-  if (password.len > 0) {
+  if (password.is_nonempty()) {
     output->push_back(':');
     out_password->begin = output->length();
     AppendStringOfType(&password_spec[password.begin],
@@ -209,7 +221,7 @@ inline void WritePortInt(char* output, int output_len, int port) {
 }
 
 // This function will prepend the colon if there will be a port.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoPort(const CHAR* spec,
             const Component& port,
             int default_port_for_scheme,
@@ -284,7 +296,7 @@ const bool kShouldEscapeCharInFragment[0x80] = {
 };
 // clang-format on
 
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 void DoCanonicalizeRef(const CHAR* spec,
                        const Component& ref,
                        CanonOutput* output,
@@ -364,9 +376,9 @@ bool CanonicalizeUserInfo(const char* username_source,
                           CanonOutput* output,
                           Component* out_username,
                           Component* out_password) {
-  return DoUserInfo<char, unsigned char>(
-      username_source, username, password_source, password,
-      output, out_username, out_password);
+  return DoUserInfo<char, unsigned char>(username_source, username,
+                                         password_source, password, output,
+                                         out_username, out_password);
 }
 
 bool CanonicalizeUserInfo(const char16_t* username_source,
@@ -386,8 +398,7 @@ bool CanonicalizePort(const char* spec,
                       int default_port_for_scheme,
                       CanonOutput* output,
                       Component* out_port) {
-  return DoPort<char, unsigned char>(spec, port,
-                                     default_port_for_scheme,
+  return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
                                      output, out_port);
 }
author	Cronet Mainline Eng <cronet-mainline-eng+copybara@google.com>	2023-03-22 02:58:49 -0800
committer	Patrick Rohr <prohr@google.com>	2023-03-22 04:40:18 -0700
commit	c175721cfcc03e339122be17d569239df9762b2b (patch)
tree	64fed42a909067904ef77b573e554a172241bdfe /url/url_canon_etc.cc
parent	26b17131b27be4b84fc089d96dcc1998e686ecf9 (diff)
download	cronet-c175721cfcc03e339122be17d569239df9762b2b.tar.gz