diff options
Diffstat (limited to 'url/url_canon_path.cc')
-rw-r--r-- | url/url_canon_path.cc | 28 |
1 files changed, 13 insertions, 15 deletions
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc index 676468d5d..2cd84b409 100644 --- a/url/url_canon_path.cc +++ b/url/url_canon_path.cc @@ -6,6 +6,7 @@ #include "base/check.h" #include "base/check_op.h" +#include "base/metrics/histogram_functions.h" #include "third_party/abseil-cpp/absl/types/optional.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" @@ -37,25 +38,24 @@ enum CharacterFlags { // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these // characters unescaped, they should just be copied. UNESCAPE = 4, - - // This character is disallowed in URLs. Note that the "special" bit is also - // set to trigger handling. - INVALID_BIT = 8, - INVALID = INVALID_BIT | SPECIAL, }; // This table contains one of the above flag values. Note some flags are more // than one bits because they also turn on the "special" flag. Special is the // only flag that may be combined with others. // -// This table is designed to match exactly what IE does with the characters. +// This table was used to be designed to match exactly what IE did with the +// characters, however, which doesn't comply with the URL Standard as of Jun +// 2023. See http://crbug.com/1400251 and http://crbug.com/1252531 for efforts +// to comply with the URL Standard. // // Dot is even more special, and the escaped version is handled specially by // IsDot. Therefore, we don't need the "escape" flag, and even the "unescape" // bit is never handled (we just need the "special") bit. +// clang-format off const unsigned char kPathCharLookup[0x100] = { // NULL control chars... - INVALID, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, + ESCAPE , ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, // control chars... ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, // ' ' ! " # $ % & ' ( ) * + , - . / @@ -79,6 +79,7 @@ const unsigned char kPathCharLookup[0x100] = { ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE}; +// clang-format on enum DotDisposition { // The given dot is just part of a filename and is not special. @@ -266,6 +267,7 @@ bool DoPartialPathInternal(const CHAR* spec, absl::optional<size_t> last_invalid_percent_index; bool success = true; + bool unescape_escaped_char = false; for (size_t i = static_cast<size_t>(path.begin); i < end; i++) { UCHAR uch = static_cast<UCHAR>(spec[i]); if (sizeof(CHAR) > 1 && uch >= 0x80) { @@ -333,6 +335,8 @@ bool DoPartialPathInternal(const CHAR* spec, if (unescaped_flags & UNESCAPE) { // This escaped value shouldn't be escaped. Try to copy it. + unescape_escaped_char = true; + output->push_back(unescaped_value); // If we just unescaped a value within 2 output characters of the // '%' from a previously-detected invalid escape sequence, we @@ -354,8 +358,6 @@ bool DoPartialPathInternal(const CHAR* spec, output->push_back('%'); output->push_back(static_cast<char>(spec[i - 1])); output->push_back(static_cast<char>(spec[i])); - if (unescaped_flags & INVALID_BIT) - success = false; } } else { // Invalid escape sequence. IE7+ rejects any URLs with such @@ -366,12 +368,6 @@ bool DoPartialPathInternal(const CHAR* spec, last_invalid_percent_index = output->length(); output->push_back('%'); } - - } else if (flags & INVALID_BIT) { - // For NULLs, etc. fail. - AppendEscapedChar(out_ch, output); - success = false; - } else if (flags & ESCAPE_BIT) { // This character should be escaped. AppendEscapedChar(out_ch, output); @@ -382,6 +378,8 @@ bool DoPartialPathInternal(const CHAR* spec, } } } + base::UmaHistogramBoolean("URL.Path.UnescapeEscapedChar", + unescape_escaped_char); return success; } |