From 168f7e285114554eb2ac9bc22343cca461355b50 Mon Sep 17 00:00:00 2001 From: Cronet Mainline Eng Date: Tue, 28 May 2024 13:59:50 +0900 Subject: Import Cronet version 122.0.6261.43 FolderOrigin-RevId: /tmp/copybara-origin/src Change-Id: Ifb7b548cde690e10cc102366bc538e744efa902b --- third_party/re2/src/.bazelrc | 2 -- third_party/re2/src/.github/workflows/ci-bazel.yml | 2 ++ third_party/re2/src/.github/workflows/ci-cmake.yml | 2 ++ third_party/re2/src/.github/workflows/ci.yml | 2 ++ third_party/re2/src/.github/workflows/pr.yml | 8 +++++ third_party/re2/src/.github/workflows/python.yml | 2 ++ third_party/re2/src/AUTHORS | 13 ------- third_party/re2/src/CONTRIBUTORS | 41 ---------------------- third_party/re2/src/MODULE.bazel | 12 +++++-- third_party/re2/src/doc/mksyntaxgo | 10 +++--- third_party/re2/src/python/BUILD.bazel | 36 +++++++++++++++++++ third_party/re2/src/python/setup.py | 13 ++++++- third_party/re2/src/re2/parse.cc | 23 +++++++++++- third_party/re2/src/re2/testing/parse_test.cc | 33 +++++++++++++++++ third_party/re2/src/re2/unicode.py | 19 ---------- 15 files changed, 133 insertions(+), 85 deletions(-) delete mode 100644 third_party/re2/src/AUTHORS delete mode 100644 third_party/re2/src/CONTRIBUTORS (limited to 'third_party/re2/src') diff --git a/third_party/re2/src/.bazelrc b/third_party/re2/src/.bazelrc index 540fb5738..c8ff98fa2 100644 --- a/third_party/re2/src/.bazelrc +++ b/third_party/re2/src/.bazelrc @@ -2,8 +2,6 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# Enable Bzlmod. This will be the default eventually... -build --enable_bzlmod # Enable layering check features. Useful on Clang only. build --features=layering_check # Enable parse headers features. Enforcing that headers are self-contained. diff --git a/third_party/re2/src/.github/workflows/ci-bazel.yml b/third_party/re2/src/.github/workflows/ci-bazel.yml index 013b52ca4..d203d2d42 100644 --- a/third_party/re2/src/.github/workflows/ci-bazel.yml +++ b/third_party/re2/src/.github/workflows/ci-bazel.yml @@ -2,6 +2,8 @@ name: CI (Bazel) on: push: branches: [main] +permissions: + contents: read jobs: build: runs-on: ${{ matrix.os }} diff --git a/third_party/re2/src/.github/workflows/ci-cmake.yml b/third_party/re2/src/.github/workflows/ci-cmake.yml index d2d03afab..2287779ff 100644 --- a/third_party/re2/src/.github/workflows/ci-cmake.yml +++ b/third_party/re2/src/.github/workflows/ci-cmake.yml @@ -2,6 +2,8 @@ name: CI (CMake) on: push: branches: [main] +permissions: + contents: read jobs: build-linux: runs-on: ubuntu-latest diff --git a/third_party/re2/src/.github/workflows/ci.yml b/third_party/re2/src/.github/workflows/ci.yml index 44ac9dc29..41a892d35 100644 --- a/third_party/re2/src/.github/workflows/ci.yml +++ b/third_party/re2/src/.github/workflows/ci.yml @@ -2,6 +2,8 @@ name: CI on: push: branches: [main] +permissions: + contents: read jobs: build-appleclang: runs-on: macos-latest diff --git a/third_party/re2/src/.github/workflows/pr.yml b/third_party/re2/src/.github/workflows/pr.yml index 860da6236..e3f94fa06 100644 --- a/third_party/re2/src/.github/workflows/pr.yml +++ b/third_party/re2/src/.github/workflows/pr.yml @@ -3,8 +3,16 @@ on: pull_request_target: branches: [main] types: [opened] +permissions: + contents: read jobs: close: + permissions: + contents: read + # We have to use two different APIs below, + # so just grant two different permissions. + issues: write + pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/third_party/re2/src/.github/workflows/python.yml b/third_party/re2/src/.github/workflows/python.yml index 2680db24c..0767cc5a8 100644 --- a/third_party/re2/src/.github/workflows/python.yml +++ b/third_party/re2/src/.github/workflows/python.yml @@ -5,6 +5,8 @@ on: build: required: true type: number +permissions: + contents: read jobs: wheel-linux: name: Linux ${{ matrix.os }}, ${{ matrix.arch.name }}, Python ${{ matrix.ver }} diff --git a/third_party/re2/src/AUTHORS b/third_party/re2/src/AUTHORS deleted file mode 100644 index 0754006fe..000000000 --- a/third_party/re2/src/AUTHORS +++ /dev/null @@ -1,13 +0,0 @@ -# This is the official list of RE2 authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as -# Name or Organization -# The email address is not required for organizations. - -# Please keep the list sorted. - -Google Inc. -Samsung Electronics -Stefano Rivera diff --git a/third_party/re2/src/CONTRIBUTORS b/third_party/re2/src/CONTRIBUTORS deleted file mode 100644 index 1a1c84827..000000000 --- a/third_party/re2/src/CONTRIBUTORS +++ /dev/null @@ -1,41 +0,0 @@ -# This is the official list of people who can contribute -# (and typically have contributed) code to the RE2 repository. -# The AUTHORS file lists the copyright holders; this file -# lists people. For example, Google employees are listed here -# but not in AUTHORS, because Google holds the copyright. -# -# The submission process automatically checks to make sure -# that people submitting code are listed in this file (by email address). -# -# Names should be added to this file only after verifying that -# the individual or the individual's organization has agreed to -# the appropriate Contributor License Agreement, found here: -# -# http://code.google.com/legal/individual-cla-v1.0.html -# http://code.google.com/legal/corporate-cla-v1.0.html -# -# The agreement for individuals can be filled out on the web. -# -# When adding J Random Contributor's name to this file, -# either J's name or J's organization's name should be -# added to the AUTHORS file, depending on whether the -# individual or corporate CLA was used. - -# Names should be added to this file like so: -# Name - -# Please keep the list sorted. - -Dominic Battré -Doug Kwan -Dmitriy Vyukov -John Millikin -Mike Nazarewicz -Nico Weber -Pawel Hajdan -Rob Pike -Russ Cox -Sanjay Ghemawat -Stefano Rivera -Srinivasan Venkatachary -Viatcheslav Ostapenko diff --git a/third_party/re2/src/MODULE.bazel b/third_party/re2/src/MODULE.bazel index 87a5576a1..dd0e0bb49 100644 --- a/third_party/re2/src/MODULE.bazel +++ b/third_party/re2/src/MODULE.bazel @@ -11,10 +11,16 @@ module( ) bazel_dep(name = "platforms", version = "0.0.8") +bazel_dep(name = "apple_support", version = "1.11.1", repo_name = "build_bazel_apple_support") bazel_dep(name = "rules_cc", version = "0.0.9") -bazel_dep(name = "abseil-cpp", version = "20230802.0", repo_name = "com_google_absl") -bazel_dep(name = "rules_python", version = "0.26.0") -bazel_dep(name = "pybind11_bazel", version = "2.11.1") +bazel_dep(name = "abseil-cpp", version = "20230802.1", repo_name = "com_google_absl") +bazel_dep(name = "rules_python", version = "0.28.0") +bazel_dep(name = "pybind11_bazel", version = "2.11.1.bzl.1") + +# This is a temporary hack for `x64_x86_windows`. +# TODO(junyer): Remove whenever no longer needed. +cc_configure = use_extension("@bazel_tools//tools/cpp:cc_configure.bzl", "cc_configure_extension") +use_repo(cc_configure, "local_config_cc") python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension") python_configure.toolchain(python_version = "3") # ignored when non-root module diff --git a/third_party/re2/src/doc/mksyntaxgo b/third_party/re2/src/doc/mksyntaxgo index d30d28146..1a09b87cc 100755 --- a/third_party/re2/src/doc/mksyntaxgo +++ b/third_party/re2/src/doc/mksyntaxgo @@ -24,16 +24,16 @@ sam -d $out <<'!' /* Package syntax parses regular expressions into parse trees and compiles parse trees into programs. Most clients of regular expressions will use the -facilities of package regexp (such as Compile and Match) instead of this package. +facilities of package [regexp] (such as [regexp.Compile] and [regexp.Match]) instead of this package. -Syntax +# Syntax -The regular expression syntax understood by this package when parsing with the Perl flag is as follows. -Parts of the syntax can be disabled by passing alternate flags to Parse. +The regular expression syntax understood by this package when parsing with the [Perl] flag is as follows. +Parts of the syntax can be disabled by passing alternate flags to [Parse]. . $a -Unicode character classes are those in unicode.Categories and unicode.Scripts. +Unicode character classes are those in [unicode.Categories] and [unicode.Scripts]. */ package syntax . diff --git a/third_party/re2/src/python/BUILD.bazel b/third_party/re2/src/python/BUILD.bazel index a05fb6ec7..48d7d3f58 100644 --- a/third_party/re2/src/python/BUILD.bazel +++ b/third_party/re2/src/python/BUILD.bazel @@ -34,3 +34,39 @@ py_test( "@abseil-py//absl/testing:parameterized", ], ) + +# These are implementation details for `setup.py`, so they can be +# named however we want. For now, they are named to be consistent +# with the `--cpu` flag values that they will eventually replace. + +platform( + name = "darwin_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:macos", + ], +) + +platform( + name = "darwin_arm64", + constraint_values = [ + "@platforms//cpu:arm64", + "@platforms//os:macos", + ], +) + +platform( + name = "x64_x86_windows", + constraint_values = [ + "@platforms//cpu:x86_32", + "@platforms//os:windows", + ], +) + +platform( + name = "x64_windows", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], +) diff --git a/third_party/re2/src/python/setup.py b/third_party/re2/src/python/setup.py index 3bd11edb7..df65415ee 100644 --- a/third_party/re2/src/python/setup.py +++ b/third_party/re2/src/python/setup.py @@ -53,9 +53,20 @@ class BuildExt(setuptools.command.build_ext.build_ext): cmd = ['bazel', 'build'] try: - cmd.append(f'--cpu={os.environ["BAZEL_CPU"].lower()}') + cpu = os.environ['BAZEL_CPU'] + cmd.append(f'--cpu={cpu}') + cmd.append(f'--platforms=//python:{cpu}') + if cpu == 'x64_x86_windows': + # Register the local 32-bit C++ toolchain with highest priority. + # (This is likely to break in some release of Bazel after 7.0.0, + # but this special case can hopefully be entirely removed then.) + cmd.append(f'--extra_toolchains=@local_config_cc//:cc-toolchain-{cpu}') except KeyError: pass + # Register the local Python toolchain with highest priority. + cmd.append('--extra_toolchains=@local_config_python//:py_toolchain') + # Print debug information during toolchain resolution. + cmd.append('--toolchain_resolution_debug=.*') cmd += ['--compilation_mode=opt', '--', ':all'] self.spawn(cmd) diff --git a/third_party/re2/src/re2/parse.cc b/third_party/re2/src/re2/parse.cc index 655cb9a27..904599280 100644 --- a/third_party/re2/src/re2/parse.cc +++ b/third_party/re2/src/re2/parse.cc @@ -1177,7 +1177,17 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub, for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) ccb.AddRange(it->lo, it->hi); } else if (re->op() == kRegexpLiteral) { - ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + if (re->parse_flags() & Regexp::FoldCase) { + // AddFoldedRange() can terminate prematurely if the character class + // already contains the rune. For example, if it contains 'a' and we + // want to add folded 'a', it sees 'a' and stops without adding 'A'. + // To avoid that, we use an empty character class and then merge it. + CharClassBuilder tmp; + tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + ccb.AddCharClass(&tmp); + } else { + ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + } } else { LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " " << re->ToString(); @@ -2060,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { return false; } + // Check for look-around assertions. This is NOT because we support them! ;) + // As per https://github.com/google/re2/issues/468, we really want to report + // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions. + // Additionally, it would be nice to report not "(?<", but "(?<=" or "(? 3 && (t[2] == '=' || t[2] == '!')) || + (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) { + status_->set_code(kRegexpBadPerlOp); + status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3)); + return false; + } + // Check for named captures, first introduced in Python's regexp library. // As usual, there are three slightly different syntaxes: // diff --git a/third_party/re2/src/re2/testing/parse_test.cc b/third_party/re2/src/re2/testing/parse_test.cc index 0ee5561e9..7684b62a4 100644 --- a/third_party/re2/src/re2/testing/parse_test.cc +++ b/third_party/re2/src/re2/testing/parse_test.cc @@ -356,6 +356,13 @@ Test prefix_tests[] = { "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}" "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}" "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" }, + // As per https://github.com/google/re2/issues/467, + // these should factor identically, but they didn't + // because AddFoldedRange() terminated prematurely. + { "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" }, + { "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" }, + { "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" }, + { "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" }, }; // Test that prefix factoring works. @@ -525,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) { EXPECT_EQ(status.error_arg(), "(?"); } +// Test that look-around error args are correct. +TEST(LookAround, ErrorArgs) { + RegexpStatus status; + Regexp* re; + + re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?="); + + re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?!"); + + re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?<="); + + re = Regexp::Parse("(? '0x263A'. - - Args: - v: code point to convert - - Returns: - Unicode string - - Raises: - InputError: the argument is not a valid Unicode value. - """ - if v < 0 or v > _RUNE_MAX: - raise InputError("invalid Unicode value %s" % (v,)) - return "0x%04X" % (v,) - - def _ParseContinue(s): """Parses a Unicode continuation field. -- cgit v1.2.3