summaryrefslogtreecommitdiff
path: root/third_party/re2
diff options
context:
space:
mode:
authorCronet Mainline Eng <cronet-mainline-eng+copybara@google.com>2024-05-28 13:59:50 +0900
committerMotomu Utsumi <motomuman@google.com>2024-05-28 14:11:54 +0900
commit168f7e285114554eb2ac9bc22343cca461355b50 (patch)
treec65ccc97fb3dc01e329951c1c7c7901aef7b7a2a /third_party/re2
parent5cfdd35118d5a23349255971e97737e32895ec0f (diff)
downloadcronet-168f7e285114554eb2ac9bc22343cca461355b50.tar.gz
Import Cronet version 122.0.6261.43
FolderOrigin-RevId: /tmp/copybara-origin/src Change-Id: Ifb7b548cde690e10cc102366bc538e744efa902b
Diffstat (limited to 'third_party/re2')
-rw-r--r--third_party/re2/src/.bazelrc2
-rw-r--r--third_party/re2/src/.github/workflows/ci-bazel.yml2
-rw-r--r--third_party/re2/src/.github/workflows/ci-cmake.yml2
-rw-r--r--third_party/re2/src/.github/workflows/ci.yml2
-rw-r--r--third_party/re2/src/.github/workflows/pr.yml8
-rw-r--r--third_party/re2/src/.github/workflows/python.yml2
-rw-r--r--third_party/re2/src/AUTHORS13
-rw-r--r--third_party/re2/src/CONTRIBUTORS41
-rw-r--r--third_party/re2/src/MODULE.bazel12
-rwxr-xr-xthird_party/re2/src/doc/mksyntaxgo10
-rw-r--r--third_party/re2/src/python/BUILD.bazel36
-rw-r--r--third_party/re2/src/python/setup.py13
-rw-r--r--third_party/re2/src/re2/parse.cc23
-rw-r--r--third_party/re2/src/re2/testing/parse_test.cc33
-rw-r--r--third_party/re2/src/re2/unicode.py19
15 files changed, 133 insertions, 85 deletions
diff --git a/third_party/re2/src/.bazelrc b/third_party/re2/src/.bazelrc
index 540fb5738..c8ff98fa2 100644
--- a/third_party/re2/src/.bazelrc
+++ b/third_party/re2/src/.bazelrc
@@ -2,8 +2,6 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
-# Enable Bzlmod. This will be the default eventually...
-build --enable_bzlmod
# Enable layering check features. Useful on Clang only.
build --features=layering_check
# Enable parse headers features. Enforcing that headers are self-contained.
diff --git a/third_party/re2/src/.github/workflows/ci-bazel.yml b/third_party/re2/src/.github/workflows/ci-bazel.yml
index 013b52ca4..d203d2d42 100644
--- a/third_party/re2/src/.github/workflows/ci-bazel.yml
+++ b/third_party/re2/src/.github/workflows/ci-bazel.yml
@@ -2,6 +2,8 @@ name: CI (Bazel)
on:
push:
branches: [main]
+permissions:
+ contents: read
jobs:
build:
runs-on: ${{ matrix.os }}
diff --git a/third_party/re2/src/.github/workflows/ci-cmake.yml b/third_party/re2/src/.github/workflows/ci-cmake.yml
index d2d03afab..2287779ff 100644
--- a/third_party/re2/src/.github/workflows/ci-cmake.yml
+++ b/third_party/re2/src/.github/workflows/ci-cmake.yml
@@ -2,6 +2,8 @@ name: CI (CMake)
on:
push:
branches: [main]
+permissions:
+ contents: read
jobs:
build-linux:
runs-on: ubuntu-latest
diff --git a/third_party/re2/src/.github/workflows/ci.yml b/third_party/re2/src/.github/workflows/ci.yml
index 44ac9dc29..41a892d35 100644
--- a/third_party/re2/src/.github/workflows/ci.yml
+++ b/third_party/re2/src/.github/workflows/ci.yml
@@ -2,6 +2,8 @@ name: CI
on:
push:
branches: [main]
+permissions:
+ contents: read
jobs:
build-appleclang:
runs-on: macos-latest
diff --git a/third_party/re2/src/.github/workflows/pr.yml b/third_party/re2/src/.github/workflows/pr.yml
index 860da6236..e3f94fa06 100644
--- a/third_party/re2/src/.github/workflows/pr.yml
+++ b/third_party/re2/src/.github/workflows/pr.yml
@@ -3,8 +3,16 @@ on:
pull_request_target:
branches: [main]
types: [opened]
+permissions:
+ contents: read
jobs:
close:
+ permissions:
+ contents: read
+ # We have to use two different APIs below,
+ # so just grant two different permissions.
+ issues: write
+ pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
diff --git a/third_party/re2/src/.github/workflows/python.yml b/third_party/re2/src/.github/workflows/python.yml
index 2680db24c..0767cc5a8 100644
--- a/third_party/re2/src/.github/workflows/python.yml
+++ b/third_party/re2/src/.github/workflows/python.yml
@@ -5,6 +5,8 @@ on:
build:
required: true
type: number
+permissions:
+ contents: read
jobs:
wheel-linux:
name: Linux ${{ matrix.os }}, ${{ matrix.arch.name }}, Python ${{ matrix.ver }}
diff --git a/third_party/re2/src/AUTHORS b/third_party/re2/src/AUTHORS
deleted file mode 100644
index 0754006fe..000000000
--- a/third_party/re2/src/AUTHORS
+++ /dev/null
@@ -1,13 +0,0 @@
-# This is the official list of RE2 authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-# Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Google Inc.
-Samsung Electronics
-Stefano Rivera <stefano.rivera@gmail.com>
diff --git a/third_party/re2/src/CONTRIBUTORS b/third_party/re2/src/CONTRIBUTORS
deleted file mode 100644
index 1a1c84827..000000000
--- a/third_party/re2/src/CONTRIBUTORS
+++ /dev/null
@@ -1,41 +0,0 @@
-# This is the official list of people who can contribute
-# (and typically have contributed) code to the RE2 repository.
-# The AUTHORS file lists the copyright holders; this file
-# lists people. For example, Google employees are listed here
-# but not in AUTHORS, because Google holds the copyright.
-#
-# The submission process automatically checks to make sure
-# that people submitting code are listed in this file (by email address).
-#
-# Names should be added to this file only after verifying that
-# the individual or the individual's organization has agreed to
-# the appropriate Contributor License Agreement, found here:
-#
-# http://code.google.com/legal/individual-cla-v1.0.html
-# http://code.google.com/legal/corporate-cla-v1.0.html
-#
-# The agreement for individuals can be filled out on the web.
-#
-# When adding J Random Contributor's name to this file,
-# either J's name or J's organization's name should be
-# added to the AUTHORS file, depending on whether the
-# individual or corporate CLA was used.
-
-# Names should be added to this file like so:
-# Name <email address>
-
-# Please keep the list sorted.
-
-Dominic Battré <battre@chromium.org>
-Doug Kwan <dougkwan@google.com>
-Dmitriy Vyukov <dvyukov@google.com>
-John Millikin <jmillikin@gmail.com>
-Mike Nazarewicz <mpn@google.com>
-Nico Weber <thakis@chromium.org>
-Pawel Hajdan <phajdan.jr@gmail.com>
-Rob Pike <r@google.com>
-Russ Cox <rsc@swtch.com>
-Sanjay Ghemawat <sanjay@google.com>
-Stefano Rivera <stefano.rivera@gmail.com>
-Srinivasan Venkatachary <vsri@google.com>
-Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
diff --git a/third_party/re2/src/MODULE.bazel b/third_party/re2/src/MODULE.bazel
index 87a5576a1..dd0e0bb49 100644
--- a/third_party/re2/src/MODULE.bazel
+++ b/third_party/re2/src/MODULE.bazel
@@ -11,10 +11,16 @@ module(
)
bazel_dep(name = "platforms", version = "0.0.8")
+bazel_dep(name = "apple_support", version = "1.11.1", repo_name = "build_bazel_apple_support")
bazel_dep(name = "rules_cc", version = "0.0.9")
-bazel_dep(name = "abseil-cpp", version = "20230802.0", repo_name = "com_google_absl")
-bazel_dep(name = "rules_python", version = "0.26.0")
-bazel_dep(name = "pybind11_bazel", version = "2.11.1")
+bazel_dep(name = "abseil-cpp", version = "20230802.1", repo_name = "com_google_absl")
+bazel_dep(name = "rules_python", version = "0.28.0")
+bazel_dep(name = "pybind11_bazel", version = "2.11.1.bzl.1")
+
+# This is a temporary hack for `x64_x86_windows`.
+# TODO(junyer): Remove whenever no longer needed.
+cc_configure = use_extension("@bazel_tools//tools/cpp:cc_configure.bzl", "cc_configure_extension")
+use_repo(cc_configure, "local_config_cc")
python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension")
python_configure.toolchain(python_version = "3") # ignored when non-root module
diff --git a/third_party/re2/src/doc/mksyntaxgo b/third_party/re2/src/doc/mksyntaxgo
index d30d28146..1a09b87cc 100755
--- a/third_party/re2/src/doc/mksyntaxgo
+++ b/third_party/re2/src/doc/mksyntaxgo
@@ -24,16 +24,16 @@ sam -d $out <<'!'
/*
Package syntax parses regular expressions into parse trees and compiles
parse trees into programs. Most clients of regular expressions will use the
-facilities of package regexp (such as Compile and Match) instead of this package.
+facilities of package [regexp] (such as [regexp.Compile] and [regexp.Match]) instead of this package.
-Syntax
+# Syntax
-The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
-Parts of the syntax can be disabled by passing alternate flags to Parse.
+The regular expression syntax understood by this package when parsing with the [Perl] flag is as follows.
+Parts of the syntax can be disabled by passing alternate flags to [Parse].
.
$a
-Unicode character classes are those in unicode.Categories and unicode.Scripts.
+Unicode character classes are those in [unicode.Categories] and [unicode.Scripts].
*/
package syntax
.
diff --git a/third_party/re2/src/python/BUILD.bazel b/third_party/re2/src/python/BUILD.bazel
index a05fb6ec7..48d7d3f58 100644
--- a/third_party/re2/src/python/BUILD.bazel
+++ b/third_party/re2/src/python/BUILD.bazel
@@ -34,3 +34,39 @@ py_test(
"@abseil-py//absl/testing:parameterized",
],
)
+
+# These are implementation details for `setup.py`, so they can be
+# named however we want. For now, they are named to be consistent
+# with the `--cpu` flag values that they will eventually replace.
+
+platform(
+ name = "darwin_x86_64",
+ constraint_values = [
+ "@platforms//cpu:x86_64",
+ "@platforms//os:macos",
+ ],
+)
+
+platform(
+ name = "darwin_arm64",
+ constraint_values = [
+ "@platforms//cpu:arm64",
+ "@platforms//os:macos",
+ ],
+)
+
+platform(
+ name = "x64_x86_windows",
+ constraint_values = [
+ "@platforms//cpu:x86_32",
+ "@platforms//os:windows",
+ ],
+)
+
+platform(
+ name = "x64_windows",
+ constraint_values = [
+ "@platforms//cpu:x86_64",
+ "@platforms//os:windows",
+ ],
+)
diff --git a/third_party/re2/src/python/setup.py b/third_party/re2/src/python/setup.py
index 3bd11edb7..df65415ee 100644
--- a/third_party/re2/src/python/setup.py
+++ b/third_party/re2/src/python/setup.py
@@ -53,9 +53,20 @@ class BuildExt(setuptools.command.build_ext.build_ext):
cmd = ['bazel', 'build']
try:
- cmd.append(f'--cpu={os.environ["BAZEL_CPU"].lower()}')
+ cpu = os.environ['BAZEL_CPU']
+ cmd.append(f'--cpu={cpu}')
+ cmd.append(f'--platforms=//python:{cpu}')
+ if cpu == 'x64_x86_windows':
+ # Register the local 32-bit C++ toolchain with highest priority.
+ # (This is likely to break in some release of Bazel after 7.0.0,
+ # but this special case can hopefully be entirely removed then.)
+ cmd.append(f'--extra_toolchains=@local_config_cc//:cc-toolchain-{cpu}')
except KeyError:
pass
+ # Register the local Python toolchain with highest priority.
+ cmd.append('--extra_toolchains=@local_config_python//:py_toolchain')
+ # Print debug information during toolchain resolution.
+ cmd.append('--toolchain_resolution_debug=.*')
cmd += ['--compilation_mode=opt', '--', ':all']
self.spawn(cmd)
diff --git a/third_party/re2/src/re2/parse.cc b/third_party/re2/src/re2/parse.cc
index 655cb9a27..904599280 100644
--- a/third_party/re2/src/re2/parse.cc
+++ b/third_party/re2/src/re2/parse.cc
@@ -1177,7 +1177,17 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
ccb.AddRange(it->lo, it->hi);
} else if (re->op() == kRegexpLiteral) {
- ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ if (re->parse_flags() & Regexp::FoldCase) {
+ // AddFoldedRange() can terminate prematurely if the character class
+ // already contains the rune. For example, if it contains 'a' and we
+ // want to add folded 'a', it sees 'a' and stops without adding 'A'.
+ // To avoid that, we use an empty character class and then merge it.
+ CharClassBuilder tmp;
+ tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ ccb.AddCharClass(&tmp);
+ } else {
+ ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ }
} else {
LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
<< re->ToString();
@@ -2060,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
return false;
}
+ // Check for look-around assertions. This is NOT because we support them! ;)
+ // As per https://github.com/google/re2/issues/468, we really want to report
+ // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions.
+ // Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!".
+ if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) ||
+ (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) {
+ status_->set_code(kRegexpBadPerlOp);
+ status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3));
+ return false;
+ }
+
// Check for named captures, first introduced in Python's regexp library.
// As usual, there are three slightly different syntaxes:
//
diff --git a/third_party/re2/src/re2/testing/parse_test.cc b/third_party/re2/src/re2/testing/parse_test.cc
index 0ee5561e9..7684b62a4 100644
--- a/third_party/re2/src/re2/testing/parse_test.cc
+++ b/third_party/re2/src/re2/testing/parse_test.cc
@@ -356,6 +356,13 @@ Test prefix_tests[] = {
"cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
"cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
"str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
+ // As per https://github.com/google/re2/issues/467,
+ // these should factor identically, but they didn't
+ // because AddFoldedRange() terminated prematurely.
+ { "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" },
};
// Test that prefix factoring works.
@@ -525,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) {
EXPECT_EQ(status.error_arg(), "(?<space bar>");
}
+// Test that look-around error args are correct.
+TEST(LookAround, ErrorArgs) {
+ RegexpStatus status;
+ Regexp* re;
+
+ re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?=");
+
+ re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?!");
+
+ re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?<=");
+
+ re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?<!");
+}
+
} // namespace re2
diff --git a/third_party/re2/src/re2/unicode.py b/third_party/re2/src/re2/unicode.py
index 91734074a..9599304b3 100644
--- a/third_party/re2/src/re2/unicode.py
+++ b/third_party/re2/src/re2/unicode.py
@@ -75,25 +75,6 @@ def _URange(s):
raise InputError("invalid Unicode range %s" % (s,))
-def _UStr(v):
- """Converts Unicode code point to hex string.
-
- 0x263a => '0x263A'.
-
- Args:
- v: code point to convert
-
- Returns:
- Unicode string
-
- Raises:
- InputError: the argument is not a valid Unicode value.
- """
- if v < 0 or v > _RUNE_MAX:
- raise InputError("invalid Unicode value %s" % (v,))
- return "0x%04X" % (v,)
-
-
def _ParseContinue(s):
"""Parses a Unicode continuation field.