aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2020-04-28 20:24:00 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2020-04-28 20:24:00 +0000
commit085dd2de995a346670c05f8da48cdb3fd1fc46ad (patch)
treee73719ac6eb3178b7cde375373cbf600f2c20aea
parent4c1a5f7fe0e5760a5079d53b4b0646e9a8e7d31f (diff)
parent9bfe4731e753c5e605a03889a25564b889648762 (diff)
downloadpcre-q_tzdata_aml_297100000.tar.gz
Change-Id: I48940243785db116fc8d64b9e43a7b5556040585
-rw-r--r--Android.bp33
-rw-r--r--METADATA10
-rw-r--r--README.version2
-rw-r--r--dist2/AUTHORS6
-rw-r--r--dist2/CMakeLists.txt82
-rw-r--r--dist2/ChangeLog160
-rw-r--r--dist2/LICENCE6
-rw-r--r--dist2/Makefile.am10
-rw-r--r--dist2/Makefile.in830
-rw-r--r--dist2/NEWS48
-rw-r--r--dist2/NON-AUTOTOOLS-BUILD14
-rw-r--r--dist2/README50
-rwxr-xr-xdist2/RunGrepTest48
-rw-r--r--dist2/RunGrepTest.bat9
-rwxr-xr-xdist2/RunTest2
-rw-r--r--dist2/aclocal.m4191
-rwxr-xr-xdist2/ar-lib4
-rwxr-xr-xdist2/compile8
-rw-r--r--dist2/config-cmake.h.in3
-rwxr-xr-xdist2/config.guess559
-rwxr-xr-xdist2/config.sub249
-rwxr-xr-xdist2/configure337
-rw-r--r--dist2/configure.ac58
-rwxr-xr-xdist2/depcomp8
-rw-r--r--dist2/doc/html/NON-AUTOTOOLS-BUILD.txt14
-rw-r--r--dist2/doc/html/README.txt50
-rw-r--r--dist2/doc/html/pcre2.html4
-rw-r--r--dist2/doc/html/pcre2_compile.html7
-rw-r--r--dist2/doc/html/pcre2_dfa_match.html2
-rw-r--r--dist2/doc/html/pcre2_jit_compile.html1
-rw-r--r--dist2/doc/html/pcre2_match.html8
-rw-r--r--dist2/doc/html/pcre2_match_data_free.html5
-rw-r--r--dist2/doc/html/pcre2_pattern_info.html3
-rw-r--r--dist2/doc/html/pcre2_set_compile_extra_options.html4
-rw-r--r--dist2/doc/html/pcre2_set_substitute_callout.html43
-rw-r--r--dist2/doc/html/pcre2_substring_nametable_scan.html4
-rw-r--r--dist2/doc/html/pcre2api.html471
-rw-r--r--dist2/doc/html/pcre2build.html52
-rw-r--r--dist2/doc/html/pcre2callout.html18
-rw-r--r--dist2/doc/html/pcre2compat.html55
-rw-r--r--dist2/doc/html/pcre2grep.html37
-rw-r--r--dist2/doc/html/pcre2jit.html85
-rw-r--r--dist2/doc/html/pcre2limits.html22
-rw-r--r--dist2/doc/html/pcre2matching.html24
-rw-r--r--dist2/doc/html/pcre2pattern.html1057
-rw-r--r--dist2/doc/html/pcre2perform.html10
-rw-r--r--dist2/doc/html/pcre2posix.html152
-rw-r--r--dist2/doc/html/pcre2syntax.html147
-rw-r--r--dist2/doc/html/pcre2test.html110
-rw-r--r--dist2/doc/html/pcre2unicode.html210
-rw-r--r--dist2/doc/pcre2.36
-rw-r--r--dist2/doc/pcre2.txt5600
-rw-r--r--dist2/doc/pcre2_compile.310
-rw-r--r--dist2/doc/pcre2_dfa_match.34
-rw-r--r--dist2/doc/pcre2_jit_compile.33
-rw-r--r--dist2/doc/pcre2_match.310
-rw-r--r--dist2/doc/pcre2_match_data_free.36
-rw-r--r--dist2/doc/pcre2_pattern_info.35
-rw-r--r--dist2/doc/pcre2_set_compile_extra_options.38
-rw-r--r--dist2/doc/pcre2_set_substitute_callout.331
-rw-r--r--dist2/doc/pcre2_substring_nametable_scan.36
-rw-r--r--dist2/doc/pcre2api.3475
-rw-r--r--dist2/doc/pcre2build.335
-rw-r--r--dist2/doc/pcre2callout.321
-rw-r--r--dist2/doc/pcre2compat.357
-rw-r--r--dist2/doc/pcre2grep.138
-rw-r--r--dist2/doc/pcre2grep.txt62
-rw-r--r--dist2/doc/pcre2jit.346
-rw-r--r--dist2/doc/pcre2limits.324
-rw-r--r--dist2/doc/pcre2matching.325
-rw-r--r--dist2/doc/pcre2pattern.31026
-rw-r--r--dist2/doc/pcre2perform.312
-rw-r--r--dist2/doc/pcre2posix.3125
-rw-r--r--dist2/doc/pcre2syntax.3113
-rw-r--r--dist2/doc/pcre2test.1113
-rw-r--r--dist2/doc/pcre2test.txt528
-rw-r--r--dist2/doc/pcre2unicode.3199
-rwxr-xr-xdist2/install-sh45
-rw-r--r--dist2/ltmain.sh24
-rw-r--r--dist2/m4/libtool.m434
-rw-r--r--dist2/m4/ltoptions.m42
-rw-r--r--dist2/m4/ltsugar.m42
-rw-r--r--dist2/m4/ltversion.m412
-rw-r--r--dist2/m4/lt~obsolete.m42
-rwxr-xr-xdist2/missing14
-rwxr-xr-xdist2/perltest.sh29
-rw-r--r--dist2/src/config.h.generic15
-rw-r--r--dist2/src/config.h.in9
-rw-r--r--dist2/src/dftables.c4
-rw-r--r--dist2/src/pcre2.h.generic108
-rw-r--r--dist2/src/pcre2.h.in104
-rw-r--r--dist2/src/pcre2_auto_possess.c14
-rw-r--r--dist2/src/pcre2_chartables.c64
-rw-r--r--dist2/src/pcre2_chartables.c.dist18
-rw-r--r--dist2/src/pcre2_compile.c678
-rw-r--r--dist2/src/pcre2_context.c22
-rw-r--r--dist2/src/pcre2_convert.c2
-rw-r--r--dist2/src/pcre2_dfa_match.c71
-rw-r--r--dist2/src/pcre2_error.c19
-rw-r--r--dist2/src/pcre2_extuni.c4
-rw-r--r--dist2/src/pcre2_internal.h257
-rw-r--r--dist2/src/pcre2_intmodedep.h5
-rw-r--r--dist2/src/pcre2_jit_compile.c2230
-rw-r--r--dist2/src/pcre2_jit_match.c6
-rw-r--r--dist2/src/pcre2_jit_test.c751
-rw-r--r--dist2/src/pcre2_maketables.c26
-rw-r--r--dist2/src/pcre2_match.c88
-rw-r--r--dist2/src/pcre2_match_data.c8
-rw-r--r--dist2/src/pcre2_printint.c10
-rw-r--r--dist2/src/pcre2_script_run.c441
-rw-r--r--dist2/src/pcre2_study.c16
-rw-r--r--dist2/src/pcre2_substitute.c67
-rw-r--r--dist2/src/pcre2_tables.c71
-rw-r--r--dist2/src/pcre2_ucd.c7213
-rw-r--r--dist2/src/pcre2_ucp.h1
-rw-r--r--dist2/src/pcre2_xclass.c6
-rw-r--r--dist2/src/pcre2grep.c71
-rw-r--r--dist2/src/pcre2posix.c65
-rw-r--r--dist2/src/pcre2posix.h33
-rw-r--r--dist2/src/pcre2test.c462
-rw-r--r--dist2/src/sljit/sljitConfigInternal.h2
-rw-r--r--dist2/src/sljit/sljitExecAllocator.c46
-rw-r--r--dist2/src/sljit/sljitLir.c11
-rw-r--r--dist2/src/sljit/sljitNativeARM_64.c20
-rw-r--r--dist2/src/sljit/sljitNativeMIPS_32.c11
-rw-r--r--dist2/src/sljit/sljitNativeMIPS_64.c13
-rw-r--r--dist2/src/sljit/sljitNativeMIPS_common.c164
-rw-r--r--dist2/src/sljit/sljitNativePPC_common.c2
-rwxr-xr-xdist2/test-driver8
-rw-r--r--dist2/testdata/grepoutputCN30
-rw-r--r--dist2/testdata/testinput180
-rw-r--r--dist2/testdata/testinput1017
-rw-r--r--dist2/testdata/testinput1220
-rw-r--r--dist2/testdata/testinput174
-rw-r--r--dist2/testdata/testinput2102
-rw-r--r--dist2/testdata/testinput4162
-rw-r--r--dist2/testdata/testinput571
-rw-r--r--dist2/testdata/testinput617
-rw-r--r--dist2/testdata/testoutput1120
-rw-r--r--dist2/testdata/testoutput10181
-rw-r--r--dist2/testdata/testoutput11-1626
-rw-r--r--dist2/testdata/testoutput11-3234
-rw-r--r--dist2/testdata/testoutput12-16185
-rw-r--r--dist2/testdata/testoutput12-32190
-rw-r--r--dist2/testdata/testoutput1534
-rw-r--r--dist2/testdata/testoutput164
-rw-r--r--dist2/testdata/testoutput1740
-rw-r--r--dist2/testdata/testoutput21533
-rw-r--r--dist2/testdata/testoutput208
-rw-r--r--dist2/testdata/testoutput212
-rw-r--r--dist2/testdata/testoutput22-162
-rw-r--r--dist2/testdata/testoutput22-322
-rw-r--r--dist2/testdata/testoutput22-82
-rw-r--r--dist2/testdata/testoutput36
-rw-r--r--dist2/testdata/testoutput3A6
-rw-r--r--dist2/testdata/testoutput3B6
-rw-r--r--dist2/testdata/testoutput4270
-rw-r--r--dist2/testdata/testoutput5265
-rw-r--r--dist2/testdata/testoutput655
-rw-r--r--dist2/testdata/testoutput78
-rw-r--r--dist2/testdata/testoutput8-16-216
-rw-r--r--dist2/testdata/testoutput8-16-316
-rw-r--r--dist2/testdata/testoutput8-16-416
-rw-r--r--dist2/testdata/testoutput8-32-216
-rw-r--r--dist2/testdata/testoutput8-32-316
-rw-r--r--dist2/testdata/testoutput8-32-416
-rw-r--r--dist2/testdata/testoutput8-8-216
-rw-r--r--dist2/testdata/testoutput8-8-316
-rw-r--r--dist2/testdata/testoutput8-8-416
-rw-r--r--dist2/testdata/testoutput912
-rw-r--r--include_internal/config.h15
-rw-r--r--pcrecpp/include/pcre_scanner.h173
-rw-r--r--pcrecpp/include/pcre_stringpiece.h180
-rw-r--r--pcrecpp/include/pcrecpp.h697
-rw-r--r--pcrecpp/include/pcrecpparg.h208
-rw-r--r--pcrecpp/pcre_scanner.cc186
-rw-r--r--pcrecpp/pcre_scanner_unittest.cc161
-rw-r--r--pcrecpp/pcre_stringpiece.cc39
-rw-r--r--pcrecpp/pcre_stringpiece_unittest.cc152
-rw-r--r--pcrecpp/pcrecpp.cc727
-rw-r--r--pcrecpp/pcrecpp_internal.h71
-rw-r--r--pcrecpp/pcrecpp_unittest.cc1282
182 files changed, 15580 insertions, 19259 deletions
diff --git a/Android.bp b/Android.bp
index 15cc1a83..45d90656 100644
--- a/Android.bp
+++ b/Android.bp
@@ -19,7 +19,6 @@ libpcre2_src_files = [
libpcre2_dist_prefix + "/src/pcre2_newline.c",
libpcre2_dist_prefix + "/src/pcre2_ord2utf.c",
libpcre2_dist_prefix + "/src/pcre2_pattern_info.c",
- libpcre2_dist_prefix + "/src/pcre2_script_run.c",
libpcre2_dist_prefix + "/src/pcre2_serialize.c",
libpcre2_dist_prefix + "/src/pcre2_string_utils.c",
libpcre2_dist_prefix + "/src/pcre2_study.c",
@@ -65,11 +64,39 @@ cc_library {
double_loadable: true,
recovery_available: true,
srcs: libpcre2_src_files,
- stl: "none",
- system_shared_libs: ["libc"],
target: {
linux_bionic: {
enabled: true,
},
},
}
+
+
+//
+// Google's C++ wrapper.
+//
+
+cc_library_shared {
+ name: "libpcrecpp",
+ cflags: [
+ "-Wall",
+ "-Werror",
+ "-Wno-unused-parameter",
+ "-Wno-unused-variable",
+ ],
+ tidy_checks: [
+ "-google-build-using-namespace",
+ "-google-global-names-in-headers",
+ ],
+ local_include_dirs: ["pcrecpp/include"],
+ shared_libs: ["libpcre2"],
+ export_include_dirs: [
+ "pcrecpp/include",
+ "include",
+ ],
+ srcs: [
+ "pcrecpp/pcrecpp.cc",
+ "pcrecpp/pcre_scanner.cc",
+ "pcrecpp/pcre_stringpiece.cc",
+ ],
+}
diff --git a/METADATA b/METADATA
index 334588fe..93bc4243 100644
--- a/METADATA
+++ b/METADATA
@@ -7,12 +7,12 @@ third_party {
}
url {
type: ARCHIVE
- value: "ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-10.33.zip"
+ value: "ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-10.32.zip"
}
- version: "10.33"
+ version: "10.32"
last_upgrade_date {
- year: 2019
- month: 8
- day: 7
+ year: 2018
+ month: 9
+ day: 11
}
}
diff --git a/README.version b/README.version
new file mode 100644
index 00000000..93111802
--- /dev/null
+++ b/README.version
@@ -0,0 +1,2 @@
+URL: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-10.31.tar.gz
+Version: 10.31
diff --git a/dist2/AUTHORS b/dist2/AUTHORS
index 8d4e15a2..d5592bbc 100644
--- a/dist2/AUTHORS
+++ b/dist2/AUTHORS
@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2018 University of Cambridge
All rights reserved
@@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2018 Zoltan Herczeg
All rights reserved.
@@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2018 Zoltan Herczeg
All rights reserved.
####
diff --git a/dist2/CMakeLists.txt b/dist2/CMakeLists.txt
index 4737687b..1a2c95ba 100644
--- a/dist2/CMakeLists.txt
+++ b/dist2/CMakeLists.txt
@@ -80,20 +80,16 @@
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
# 2017-04-08 PH added HEAP_LIMIT
# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
-# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
+# 2018-06-19 PH added checks for stdint.h and inttypes.h
# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
-# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
-# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
-# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
PROJECT(PCRE2 C)
-# Increased minimum to 2.8.0 to support newer add_test features.
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
+# Increased minimum to 2.8.0 to support newer add_test features. Set policy
+# CMP0026 to avoid warnings for the use of LOCATION in GET_TARGET_PROPERTY.
-# Set policy CMP0026 to avoid warnings for the use of LOCATION in
-# GET_TARGET_PROPERTY. This should no longer be required.
-# CMAKE_POLICY(SET CMP0026 OLD)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
+CMAKE_POLICY(SET CMP0026 OLD)
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
@@ -119,6 +115,18 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
+IF(HAVE_INTTYPES_H)
+ SET(PCRE2_HAVE_INTTYPES_H 1)
+ELSE(HAVE_INTTYPES_H)
+ SET(PCRE2_HAVE_INTTYPES_H 0)
+ENDIF(HAVE_INTTYPES_H)
+
+IF(HAVE_STDINT_H)
+ SET(PCRE2_HAVE_STDINT_H 1)
+ELSE(HAVE_STDINT_H)
+ SET(PCRE2_HAVE_STDINT_H 0)
+ENDIF(HAVE_STDINT_H)
+
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
@@ -139,8 +147,6 @@ OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
OPTION(PCRE2_DEBUG "Include debugging code" OFF)
-OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
-
SET(PCRE2_EBCDIC OFF CACHE BOOL
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
@@ -180,15 +186,12 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
"Enable SELinux compatible execmem allocator in JIT.")
-SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
+SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcre2grep.")
-SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL
+SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL
"Enable callout string support in pcre2grep.")
-SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL
- "Enable callout string fork support in pcre2grep.")
-
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
@@ -307,25 +310,18 @@ IF(PCRE2_SUPPORT_JIT_SEALLOC)
SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)
-IF(PCRE2GREP_SUPPORT_JIT)
+IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
SET(SUPPORT_PCRE2GREP_JIT 1)
-ENDIF(PCRE2GREP_SUPPORT_JIT)
+ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
-IF(PCRE2GREP_SUPPORT_CALLOUT)
+IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
SET(SUPPORT_PCRE2GREP_CALLOUT 1)
- IF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
- SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
- ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
-ENDIF(PCRE2GREP_SUPPORT_CALLOUT)
+ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
IF(PCRE2_SUPPORT_VALGRIND)
SET(SUPPORT_VALGRIND 1)
ENDIF(PCRE2_SUPPORT_VALGRIND)
-IF(PCRE2_DISABLE_PERCENT_ZT)
- SET(DISABLE_PERCENT_ZT 1)
-ENDIF(PCRE2_DISABLE_PERCENT_ZT)
-
# This next one used to reference ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with
# some modifications to cmake/FindReadline.cmake which should
@@ -459,7 +455,6 @@ SET(PCRE2_SOURCES
src/pcre2_newline.c
src/pcre2_ord2utf.c
src/pcre2_pattern_info.c
- src/pcre2_script_run.c
src/pcre2_serialize.c
src/pcre2_string_utils.c
src/pcre2_study.c
@@ -649,27 +644,14 @@ IF(PCRE2_BUILD_TESTS)
TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
ENDIF(PCRE2_SUPPORT_JIT)
- # exes in Debug location tested by the RunTest and RunGrepTest shell scripts
+ # exes in Debug location tested by the RunTest shell script
# via "make test"
- # The commented out code below provokes a warning about future removal
- # of the facility, and requires policy CMP0026 to be set to "OLD". I have
- # got fed-up with the warnings, but my plea for help on the mailing list
- # produced no response. So, I've hacked. The new code below seems to work on
- # Linux.
-
-# IF(PCRE2_BUILD_PCRE2GREP)
-# GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
-# ENDIF(PCRE2_BUILD_PCRE2GREP)
-#
-# GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
-
IF(PCRE2_BUILD_PCRE2GREP)
- SET(PCRE2GREP_EXE $<TARGET_FILE:pcre2grep>)
+ GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
ENDIF(PCRE2_BUILD_PCRE2GREP)
- SET(PCRE2TEST_EXE $<TARGET_FILE:pcre2test>)
-
+ GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
# =================================================
# Write out a CTest configuration file
@@ -828,11 +810,10 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
- MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}")
- MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}")
- MESSAGE(STATUS " Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
+ MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
+ MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}")
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
- MESSAGE(STATUS " Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}")
+ MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}")
MESSAGE(STATUS " and pcre2grep)")
IF(ZLIB_FOUND)
MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
@@ -855,11 +836,6 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" )
ENDIF(READLINE_FOUND)
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
- IF(PCRE2_DISABLE_PERCENT_ZT)
- MESSAGE(STATUS " Use %zu and %td ..................: OFF" )
- ELSE(PCRE2_DISABLE_PERCENT_ZT)
- MESSAGE(STATUS " Use %zu and %td ..................: AUTO" )
- ENDIF(PCRE2_DISABLE_PERCENT_ZT)
IF(MINGW AND NOT PCRE2_STATIC)
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
diff --git a/dist2/ChangeLog b/dist2/ChangeLog
index 66c6d0b3..06b69f8d 100644
--- a/dist2/ChangeLog
+++ b/dist2/ChangeLog
@@ -2,164 +2,8 @@ Change Log for PCRE2
--------------------
-Version 10.33 16-April-2019
----------------------------
-
-1. Added "allvector" to pcre2test to make it easy to check the part of the
-ovector that shouldn't be changed, in particular after substitute and failed or
-partial matches.
-
-2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has
-a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
-
-3. Added support for callouts from pcre2_substitute(). After 10.33-RC1, but
-prior to release, fixed a bug that caused a crash if pcre2_substitute() was
-called with a NULL match context.
-
-4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
-functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
-names are defined as macros. This should help avoid linking with the wrong
-library in some environments while still exporting the POSIX names for
-pre-existing programs that use them. (The Debian alternative names are also
-defined as macros, but not documented.)
-
-5. Fix an xclass matching issue in JIT.
-
-6. Implement PCRE2_EXTRA_ESCAPED_CR_IS_LF (see Bugzilla 2315).
-
-7. Implement the Perl 5.28 experimental alphabetic names for atomic groups and
-lookaround assertions, for example, (*pla:...) and (*atomic:...). These are
-characterized by a lower case letter following (* and to simplify coding for
-this, the character tables created by pcre2_maketables() were updated to add a
-new "is lower case letter" bit. At the same time, the now unused "is
-hexadecimal digit" bit was removed. The default tables in
-src/pcre2_chartables.c.dist are updated.
-
-8. Implement the new Perl "script run" features (*script_run:...) and
-(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
-
-9. Fixed two typos in change 22 for 10.21, which added special handling for
-ranges such as a-z in EBCDIC environments. The original code probably never
-worked, though there were no bug reports.
-
-10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via
-pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast
-path. Also, when a match fails, set the subject field in the match data to NULL
-for tidiness - none of the substring extractors should reference this after
-match failure.
-
-11. If a pattern started with a subroutine call that had a quantifier with a
-minimum of zero, an incorrect "match must start with this character" could be
-recorded. Example: /(?&xxx)*ABC(?<xxx>XYZ)/ would (incorrectly) expect 'A' to
-be the first character of a match.
-
-12. The heap limit checking code in pcre2_dfa_match() could suffer from
-overflow if the heap limit was set very large. This could cause incorrect "heap
-limit exceeded" errors.
-
-13. Add "kibibytes" to the heap limit output from pcre2test -C to make the
-units clear.
-
-14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness.
-
-15. Updated the VMS-specific code in pcre2test on the advice of a VMS user.
-
-16. Removed the unnecessary inclusion of stdint.h (or inttypes.h) from
-pcre2_internal.h as it is now included by pcre2.h. Also, change 17 for 10.32
-below was unnecessarily complicated, as inttypes.h is a Standard C header,
-which is defined to be a superset of stdint.h. Instead of conditionally
-including stdint.h or inttypes.h, pcre2.h now unconditionally includes
-inttypes.h. This supports environments that do not have stdint.h but do have
-inttypes.h, which are known to exist. A note in the autotools documentation
-says (November 2018) that there are none known that are the other way round.
-
-17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to
-forcibly disable the use of %zu and %td in formatting strings because there is
-at least one version of VMS that claims to be C99 but does not support these
-modifiers.
-
-18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
-in pcre2grep to the inbuilt echo facility. This may be useful in environments
-that do not support fork().
-
-19. Fix two instances of <= 0 being applied to unsigned integers (the VMS
-compiler complains).
-
-20. Added "fork" support for VMS to pcre2grep, for running an external program
-via a string callout.
-
-21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel.
-
-22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN)
-followed by ^ it was not recognized as anchored.
-
-23. The RunGrepTest script used to cut out the test of NUL characters for
-Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD
-systems can't either. I've inverted the test so that only those OS that are
-known to work (currently only Linux) try to run this test.
-
-24. Some tests in RunGrepTest appended to testtrygrep from two different file
-descriptors instead of redirecting stderr to stdout. This worked on Linux, but
-it was reported not to on other systems, causing the tests to fail.
-
-25. In the RunTest script, make the test for stack setting use the same value
-for the stack as it needs for -bigstack.
-
-26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning.
-
-26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s
-which are valid in character classes, but not as the end of ranges, were being
-treated as literals. An example is [_-\s] (but not [\s-_] because that gave an
-error at the *start* of a range). Now an "invalid range" error is given
-independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
-
-27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape
-sequences such as \eX when they appeared invalidly in a character class. Now
-the option applies only to unrecognized or malformed escape sequences.
-
-28. Fix word boundary in JIT compiler. Patch by Mike Munday.
-
-29. The pcre2_dfa_match() function was incorrectly handling conditional version
-tests such as (?(VERSION>=0)...) when the version test was true. Incorrect
-processing or a crash could result.
-
-30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group
-names, as Perl does. There was a small bug in this new code, found by
-ClusterFuzz 12950, fixed before release.
-
-31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
-construct.
-
-32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
-from auto-anchoring if \p{Any}* starts a pattern.
-
-33. Compile invalid UTF check in JIT test when only pcre32 is enabled.
-
-34. For some time now, CMake has been warning about the setting of policy
-CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be
-removed in a future version. A request for CMake expertise on the list produced
-no result, so I have now hacked CMakeLists.txt along the lines of some changes
-I found on the Internet. The new code no longer needs the policy setting, and
-it appears to work fine on Linux.
-
-35. Setting --enable-jit=auto for an out-of-tree build failed because the
-source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
-from Ross Burton.
-
-36. Disable SSE2 JIT optimizations in x86 CPUs when SSE2 is not available.
-Patch by Guillem Jover.
-
-37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler
-warnings were reported.
-
-38. Using the clang compiler with sanitizing options causes runtime complaints
-about truncation for statments such as x = ~x when x is an 8-bit value; it
-seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x
-gets rid of the warnings. There were also two missing casts in pcre2test.
-
-
-Version 10.32 10-September-2018
--------------------------------
+Version 10.32-RC1 10-September-2018
+-----------------------------------
1. When matching using the the REG_STARTEND feature of the POSIX API with a
non-zero starting offset, unset capturing groups with lower numbers than a
diff --git a/dist2/LICENCE b/dist2/LICENCE
index 142b3b3f..b0f8804f 100644
--- a/dist2/LICENCE
+++ b/dist2/LICENCE
@@ -26,7 +26,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2018 University of Cambridge
All rights reserved.
@@ -37,7 +37,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2018 Zoltan Herczeg
All rights reserved.
@@ -48,7 +48,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2018 Zoltan Herczeg
All rights reserved.
diff --git a/dist2/Makefile.am b/dist2/Makefile.am
index b5e3635e..a4bcdf6e 100644
--- a/dist2/Makefile.am
+++ b/dist2/Makefile.am
@@ -85,7 +85,6 @@ dist_html_DATA = \
doc/html/pcre2_set_parens_nest_limit.html \
doc/html/pcre2_set_recursion_limit.html \
doc/html/pcre2_set_recursion_memory_management.html \
- doc/html/pcre2_set_substitute_callout.html \
doc/html/pcre2_substitute.html \
doc/html/pcre2_substring_copy_byname.html \
doc/html/pcre2_substring_copy_bynumber.html \
@@ -179,7 +178,6 @@ dist_man_MANS = \
doc/pcre2_set_parens_nest_limit.3 \
doc/pcre2_set_recursion_limit.3 \
doc/pcre2_set_recursion_memory_management.3 \
- doc/pcre2_set_substitute_callout.3 \
doc/pcre2_substitute.3 \
doc/pcre2_substring_copy_byname.3 \
doc/pcre2_substring_copy_bynumber.3 \
@@ -233,7 +231,7 @@ noinst_PROGRAMS =
# and 'make maintainer-clean'.
CLEANFILES =
-DISTCLEANFILES = src/config.h.in~
+DISTCLEANFILES = src/config.h.in~ config.h pcre2.h.generic
MAINTAINERCLEANFILES =
# Additional files to bundle with the distribution, over and above what
@@ -364,7 +362,6 @@ COMMON_SOURCES = \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
- src/pcre2_script_run.c \
src/pcre2_serialize.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
@@ -531,10 +528,6 @@ noinst_PROGRAMS += pcre2fuzzcheck
pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c
pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
pcre2fuzzcheck_LDADD = libpcre2-8.la
-if WITH_GCOV
-pcre2fuzzcheck_CFLAGS += $(GCOV_CFLAGS)
-pcre2fuzzcheck_LDADD += $(GCOV_LIBS)
-endif # WITH_GCOV
endif # WITH FUZZ_SUPPORT
endif # WITH_PCRE2_8
@@ -625,7 +618,6 @@ EXTRA_DIST += \
testdata/grepoutput \
testdata/grepoutput8 \
testdata/grepoutputC \
- testdata/grepoutputCN \
testdata/grepoutputN \
testdata/greppatN4 \
testdata/testinput1 \
diff --git a/dist2/Makefile.in b/dist2/Makefile.in
index 2944b0e3..597b1715 100644
--- a/dist2/Makefile.in
+++ b/dist2/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -93,7 +93,7 @@ PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-TESTS = $(am__EXEEXT_4) RunTest $(am__append_32)
+TESTS = $(am__EXEEXT_4) RunTest $(am__append_30)
bin_PROGRAMS = $(am__EXEEXT_1) pcre2test$(EXEEXT)
noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4)
@WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = dftables
@@ -112,28 +112,26 @@ noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4)
@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_14 = $(GCOV_CFLAGS)
@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_15 = $(GCOV_LIBS)
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_16 = pcre2fuzzcheck
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_17 = $(GCOV_CFLAGS)
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_18 = $(GCOV_LIBS)
-@WITH_JIT_TRUE@am__append_19 = pcre2_jit_test
-@WITH_JIT_TRUE@am__append_20 = pcre2_jit_test
-@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_21 = libpcre2-8.la
-@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_22 = libpcre2-16.la
-@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_23 = libpcre2-32.la
-@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_24 = $(GCOV_CFLAGS)
-@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_25 = $(GCOV_LIBS)
-@WITH_PCRE2_8_TRUE@am__append_26 = libpcre2-8.la libpcre2-posix.la
-@WITH_PCRE2_16_TRUE@am__append_27 = libpcre2-16.la
-@WITH_PCRE2_32_TRUE@am__append_28 = libpcre2-32.la
-@WITH_VALGRIND_TRUE@am__append_29 = $(VALGRIND_CFLAGS)
-@WITH_GCOV_TRUE@am__append_30 = $(GCOV_CFLAGS)
-@WITH_GCOV_TRUE@am__append_31 = $(GCOV_LIBS)
+@WITH_JIT_TRUE@am__append_17 = pcre2_jit_test
+@WITH_JIT_TRUE@am__append_18 = pcre2_jit_test
+@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_19 = libpcre2-8.la
+@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_20 = libpcre2-16.la
+@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_21 = libpcre2-32.la
+@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_22 = $(GCOV_CFLAGS)
+@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_23 = $(GCOV_LIBS)
+@WITH_PCRE2_8_TRUE@am__append_24 = libpcre2-8.la libpcre2-posix.la
+@WITH_PCRE2_16_TRUE@am__append_25 = libpcre2-16.la
+@WITH_PCRE2_32_TRUE@am__append_26 = libpcre2-32.la
+@WITH_VALGRIND_TRUE@am__append_27 = $(VALGRIND_CFLAGS)
+@WITH_GCOV_TRUE@am__append_28 = $(GCOV_CFLAGS)
+@WITH_GCOV_TRUE@am__append_29 = $(GCOV_LIBS)
+@WITH_PCRE2_8_TRUE@am__append_30 = RunGrepTest
+@WITH_PCRE2_8_TRUE@am__append_31 = RunGrepTest.bat
@WITH_PCRE2_8_TRUE@am__append_32 = RunGrepTest
-@WITH_PCRE2_8_TRUE@am__append_33 = RunGrepTest.bat
-@WITH_PCRE2_8_TRUE@am__append_34 = RunGrepTest
-@WITH_PCRE2_8_TRUE@am__append_35 = libpcre2-8.pc libpcre2-posix.pc
-@WITH_PCRE2_16_TRUE@am__append_36 = libpcre2-16.pc
-@WITH_PCRE2_32_TRUE@am__append_37 = libpcre2-32.pc
-@WITH_GCOV_FALSE@am__append_38 = src/*.gcda src/*.gcno
+@WITH_PCRE2_8_TRUE@am__append_33 = libpcre2-8.pc libpcre2-posix.pc
+@WITH_PCRE2_16_TRUE@am__append_34 = libpcre2-16.pc
+@WITH_PCRE2_32_TRUE@am__append_35 = libpcre2-32.pc
+@WITH_GCOV_FALSE@am__append_36 = src/*.gcda src/*.gcno
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \
@@ -155,17 +153,20 @@ CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES = libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc \
libpcre2-posix.pc pcre2-config src/pcre2.h
CONFIG_CLEAN_VPATH_FILES =
-@WITH_PCRE2_8_TRUE@am__EXEEXT_1 = pcre2grep$(EXEEXT)
-am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \
- "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \
- "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" \
- "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" \
- "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"
-@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = dftables$(EXEEXT)
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck$(EXEEXT)
-@WITH_JIT_TRUE@am__EXEEXT_4 = pcre2_jit_test$(EXEEXT)
-PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
LIBRARIES = $(noinst_LIBRARIES)
+ARFLAGS = cru
+AM_V_AR = $(am__v_AR_@AM_V@)
+am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
+am__v_AR_0 = @echo " AR " $@;
+am__v_AR_1 =
+_libs_libpcre2_fuzzsupport_a_AR = $(AR) $(ARFLAGS)
+_libs_libpcre2_fuzzsupport_a_DEPENDENCIES =
+am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST = \
+ src/pcre2_fuzzsupport.c
+am__dirstamp = $(am__leading_dot)dirstamp
+@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__libs_libpcre2_fuzzsupport_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT)
+_libs_libpcre2_fuzzsupport_a_OBJECTS = \
+ $(am__libs_libpcre2_fuzzsupport_a_OBJECTS)
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -193,20 +194,12 @@ am__uninstall_files_from_dir = { \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
+ "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \
+ "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" \
+ "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" \
+ "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
-ARFLAGS = cru
-AM_V_AR = $(am__v_AR_@AM_V@)
-am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
-am__v_AR_0 = @echo " AR " $@;
-am__v_AR_1 =
-_libs_libpcre2_fuzzsupport_a_AR = $(AR) $(ARFLAGS)
-_libs_libpcre2_fuzzsupport_a_DEPENDENCIES =
-am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST = \
- src/pcre2_fuzzsupport.c
-am__dirstamp = $(am__leading_dot)dirstamp
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__libs_libpcre2_fuzzsupport_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT)
-_libs_libpcre2_fuzzsupport_a_OBJECTS = \
- $(am__libs_libpcre2_fuzzsupport_a_OBJECTS)
libpcre2_16_la_DEPENDENCIES =
am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \
src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \
@@ -216,9 +209,8 @@ am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \
src/pcre2_jit_compile.c src/pcre2_maketables.c \
src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \
src/pcre2_ord2utf.c src/pcre2_pattern_info.c \
- src/pcre2_script_run.c src/pcre2_serialize.c \
- src/pcre2_string_utils.c src/pcre2_study.c \
- src/pcre2_substitute.c src/pcre2_substring.c \
+ src/pcre2_serialize.c src/pcre2_string_utils.c \
+ src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \
src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \
src/pcre2_valid_utf.c src/pcre2_xclass.c
am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \
@@ -237,7 +229,6 @@ am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \
src/libpcre2_16_la-pcre2_newline.lo \
src/libpcre2_16_la-pcre2_ord2utf.lo \
src/libpcre2_16_la-pcre2_pattern_info.lo \
- src/libpcre2_16_la-pcre2_script_run.lo \
src/libpcre2_16_la-pcre2_serialize.lo \
src/libpcre2_16_la-pcre2_string_utils.lo \
src/libpcre2_16_la-pcre2_study.lo \
@@ -270,9 +261,8 @@ am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \
src/pcre2_jit_compile.c src/pcre2_maketables.c \
src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \
src/pcre2_ord2utf.c src/pcre2_pattern_info.c \
- src/pcre2_script_run.c src/pcre2_serialize.c \
- src/pcre2_string_utils.c src/pcre2_study.c \
- src/pcre2_substitute.c src/pcre2_substring.c \
+ src/pcre2_serialize.c src/pcre2_string_utils.c \
+ src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \
src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \
src/pcre2_valid_utf.c src/pcre2_xclass.c
am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \
@@ -291,7 +281,6 @@ am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \
src/libpcre2_32_la-pcre2_newline.lo \
src/libpcre2_32_la-pcre2_ord2utf.lo \
src/libpcre2_32_la-pcre2_pattern_info.lo \
- src/libpcre2_32_la-pcre2_script_run.lo \
src/libpcre2_32_la-pcre2_serialize.lo \
src/libpcre2_32_la-pcre2_string_utils.lo \
src/libpcre2_32_la-pcre2_study.lo \
@@ -320,9 +309,8 @@ am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \
src/pcre2_jit_compile.c src/pcre2_maketables.c \
src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \
src/pcre2_ord2utf.c src/pcre2_pattern_info.c \
- src/pcre2_script_run.c src/pcre2_serialize.c \
- src/pcre2_string_utils.c src/pcre2_study.c \
- src/pcre2_substitute.c src/pcre2_substring.c \
+ src/pcre2_serialize.c src/pcre2_string_utils.c \
+ src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \
src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \
src/pcre2_valid_utf.c src/pcre2_xclass.c
am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \
@@ -341,7 +329,6 @@ am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \
src/libpcre2_8_la-pcre2_newline.lo \
src/libpcre2_8_la-pcre2_ord2utf.lo \
src/libpcre2_8_la-pcre2_pattern_info.lo \
- src/libpcre2_8_la-pcre2_script_run.lo \
src/libpcre2_8_la-pcre2_serialize.lo \
src/libpcre2_8_la-pcre2_string_utils.lo \
src/libpcre2_8_la-pcre2_study.lo \
@@ -370,6 +357,11 @@ libpcre2_posix_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(libpcre2_posix_la_CFLAGS) $(CFLAGS) \
$(libpcre2_posix_la_LDFLAGS) $(LDFLAGS) -o $@
@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_rpath = -rpath $(libdir)
+@WITH_PCRE2_8_TRUE@am__EXEEXT_1 = pcre2grep$(EXEEXT)
+@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = dftables$(EXEEXT)
+@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck$(EXEEXT)
+@WITH_JIT_TRUE@am__EXEEXT_4 = pcre2_jit_test$(EXEEXT)
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
am__dftables_SOURCES_DIST = src/dftables.c
@WITH_REBUILD_CHARTABLES_TRUE@am_dftables_OBJECTS = \
@WITH_REBUILD_CHARTABLES_TRUE@ src/dftables.$(OBJEXT)
@@ -382,8 +374,8 @@ pcre2_jit_test_OBJECTS = $(am_pcre2_jit_test_OBJECTS)
am__DEPENDENCIES_1 =
@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__DEPENDENCIES_2 = \
@WITH_GCOV_TRUE@@WITH_JIT_TRUE@ $(am__DEPENDENCIES_1)
-@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_21) \
-@WITH_JIT_TRUE@ $(am__append_22) $(am__append_23) \
+@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_19) \
+@WITH_JIT_TRUE@ $(am__append_20) $(am__append_21) \
@WITH_JIT_TRUE@ $(am__DEPENDENCIES_2)
pcre2_jit_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
@@ -392,10 +384,8 @@ pcre2_jit_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
am__pcre2fuzzcheck_SOURCES_DIST = src/pcre2_fuzzsupport.c
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am_pcre2fuzzcheck_OBJECTS = src/pcre2fuzzcheck-pcre2_fuzzsupport.$(OBJEXT)
pcre2fuzzcheck_OBJECTS = $(am_pcre2fuzzcheck_OBJECTS)
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1)
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_DEPENDENCIES = \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_3)
+@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la
pcre2fuzzcheck_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
$(pcre2fuzzcheck_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \
@@ -404,19 +394,19 @@ am__pcre2grep_SOURCES_DIST = src/pcre2grep.c
@WITH_PCRE2_8_TRUE@am_pcre2grep_OBJECTS = \
@WITH_PCRE2_8_TRUE@ src/pcre2grep-pcre2grep.$(OBJEXT)
pcre2grep_OBJECTS = $(am_pcre2grep_OBJECTS)
-@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_4 = \
+@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_3 = \
@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1)
@WITH_PCRE2_8_TRUE@pcre2grep_DEPENDENCIES = $(am__DEPENDENCIES_1) \
@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1) libpcre2-8.la \
-@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_4)
+@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_3)
pcre2grep_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2grep_CFLAGS) \
$(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
am_pcre2test_OBJECTS = src/pcre2test-pcre2test.$(OBJEXT)
pcre2test_OBJECTS = $(am_pcre2test_OBJECTS)
-@WITH_GCOV_TRUE@am__DEPENDENCIES_5 = $(am__DEPENDENCIES_1)
-pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_26) \
- $(am__append_27) $(am__append_28) $(am__DEPENDENCIES_5)
+@WITH_GCOV_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1)
+pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_24) \
+ $(am__append_25) $(am__append_26) $(am__DEPENDENCIES_4)
pcre2test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2test_CFLAGS) \
$(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
@@ -436,95 +426,7 @@ am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src
depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__maybe_remake_depfiles = depfiles
-am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po \
- src/$(DEPDIR)/dftables.Po \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo \
- src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo \
- src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo \
- src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo \
- src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo \
- src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po \
- src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po \
- src/$(DEPDIR)/pcre2grep-pcre2grep.Po \
- src/$(DEPDIR)/pcre2test-pcre2test.Po
+am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@@ -865,6 +767,8 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PCRE2_DATE = @PCRE2_DATE@
+PCRE2_HAVE_INTTYPES_H = @PCRE2_HAVE_INTTYPES_H@
+PCRE2_HAVE_STDINT_H = @PCRE2_HAVE_STDINT_H@
PCRE2_MAJOR = @PCRE2_MAJOR@
PCRE2_MINOR = @PCRE2_MINOR@
PCRE2_PRERELEASE = @PCRE2_PRERELEASE@
@@ -1021,7 +925,6 @@ dist_html_DATA = \
doc/html/pcre2_set_parens_nest_limit.html \
doc/html/pcre2_set_recursion_limit.html \
doc/html/pcre2_set_recursion_memory_management.html \
- doc/html/pcre2_set_substitute_callout.html \
doc/html/pcre2_substitute.html \
doc/html/pcre2_substring_copy_byname.html \
doc/html/pcre2_substring_copy_bynumber.html \
@@ -1115,7 +1018,6 @@ dist_man_MANS = \
doc/pcre2_set_parens_nest_limit.3 \
doc/pcre2_set_recursion_limit.3 \
doc/pcre2_set_recursion_memory_management.3 \
- doc/pcre2_set_substitute_callout.3 \
doc/pcre2_substitute.3 \
doc/pcre2_substring_copy_byname.3 \
doc/pcre2_substring_copy_bynumber.3 \
@@ -1153,7 +1055,7 @@ dist_man_MANS = \
lib_LTLIBRARIES = $(am__append_2) $(am__append_3) $(am__append_4) \
$(am__append_11)
check_SCRIPTS =
-dist_noinst_SCRIPTS = RunTest $(am__append_34)
+dist_noinst_SCRIPTS = RunTest $(am__append_32)
# Additional files to delete on 'make clean', 'make distclean',
# and 'make maintainer-clean'.
@@ -1164,7 +1066,8 @@ CLEANFILES = src/pcre2_chartables.c testSinput test3input test3output \
test3outputA test3outputB testtry teststdout teststderr \
teststderrgrep testtemp1grep testtemp2grep testtrygrep \
testNinputgrep
-DISTCLEANFILES = src/config.h.in~ $(am__append_38)
+DISTCLEANFILES = src/config.h.in~ config.h pcre2.h.generic \
+ $(am__append_36)
MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic
# Additional files to bundle with the distribution, over and above what
@@ -1210,22 +1113,22 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \
src/sljit/sljitNativeX86_common.c \
src/sljit/sljitProtExecAllocator.c src/sljit/sljitUtils.c \
src/pcre2_jit_match.c src/pcre2_jit_misc.c \
- src/pcre2_printint.c RunTest.bat $(am__append_33) \
+ src/pcre2_printint.c RunTest.bat $(am__append_31) \
testdata/grepbinary testdata/grepfilelist testdata/grepinput \
testdata/grepinput3 testdata/grepinput8 testdata/grepinputM \
testdata/grepinputv testdata/grepinputx testdata/greplist \
testdata/grepoutput testdata/grepoutput8 testdata/grepoutputC \
- testdata/grepoutputCN testdata/grepoutputN testdata/greppatN4 \
- testdata/testinput1 testdata/testinput2 testdata/testinput3 \
- testdata/testinput4 testdata/testinput5 testdata/testinput6 \
- testdata/testinput7 testdata/testinput8 testdata/testinput9 \
- testdata/testinput10 testdata/testinput11 testdata/testinput12 \
- testdata/testinput13 testdata/testinput14 testdata/testinput15 \
- testdata/testinput16 testdata/testinput17 testdata/testinput18 \
- testdata/testinput19 testdata/testinput20 testdata/testinput21 \
- testdata/testinput22 testdata/testinput23 testdata/testinput24 \
- testdata/testinput25 testdata/testinputEBC \
- testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \
+ testdata/grepoutputN testdata/greppatN4 testdata/testinput1 \
+ testdata/testinput2 testdata/testinput3 testdata/testinput4 \
+ testdata/testinput5 testdata/testinput6 testdata/testinput7 \
+ testdata/testinput8 testdata/testinput9 testdata/testinput10 \
+ testdata/testinput11 testdata/testinput12 testdata/testinput13 \
+ testdata/testinput14 testdata/testinput15 testdata/testinput16 \
+ testdata/testinput17 testdata/testinput18 testdata/testinput19 \
+ testdata/testinput20 testdata/testinput21 testdata/testinput22 \
+ testdata/testinput23 testdata/testinput24 testdata/testinput25 \
+ testdata/testinputEBC testdata/testoutput1 \
+ testdata/testoutput2 testdata/testoutput3 \
testdata/testoutput3A testdata/testoutput3B \
testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \
testdata/testoutput7 testdata/testoutput8-16-2 \
@@ -1281,7 +1184,6 @@ COMMON_SOURCES = \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
- src/pcre2_script_run.c \
src/pcre2_serialize.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
@@ -1344,26 +1246,21 @@ COMMON_SOURCES = \
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS)
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_LIBADD =
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_CFLAGS = \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ -DSTANDALONE \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_17)
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_LDADD = \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \
-@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_18)
+@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
+@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_LDADD = libpcre2-8.la
@WITH_JIT_TRUE@pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
-@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_24)
-@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_21) \
-@WITH_JIT_TRUE@ $(am__append_22) $(am__append_23) \
-@WITH_JIT_TRUE@ $(am__append_25)
+@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_22)
+@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_19) \
+@WITH_JIT_TRUE@ $(am__append_20) $(am__append_21) \
+@WITH_JIT_TRUE@ $(am__append_23)
pcre2test_SOURCES = src/pcre2test.c
-pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_29) $(am__append_30)
-pcre2test_LDADD = $(LIBREADLINE) $(am__append_26) $(am__append_27) \
- $(am__append_28) $(am__append_31)
+pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_27) $(am__append_28)
+pcre2test_LDADD = $(LIBREADLINE) $(am__append_24) $(am__append_25) \
+ $(am__append_26) $(am__append_29)
# We have .pc files for pkg-config users.
pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = $(am__append_35) $(am__append_36) $(am__append_37)
+pkgconfig_DATA = $(am__append_33) $(am__append_34) $(am__append_35)
# gcov/lcov code coverage reporting
#
@@ -1414,8 +1311,8 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
echo ' $(SHELL) ./config.status'; \
$(SHELL) ./config.status;; \
*) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
@@ -1453,67 +1350,25 @@ pcre2-config: $(top_builddir)/config.status $(srcdir)/pcre2-config.in
cd $(top_builddir) && $(SHELL) ./config.status $@
src/pcre2.h: $(top_builddir)/config.status $(top_srcdir)/src/pcre2.h.in
cd $(top_builddir) && $(SHELL) ./config.status $@
-install-binPROGRAMS: $(bin_PROGRAMS)
- @$(NORMAL_INSTALL)
- @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
- fi; \
- for p in $$list; do echo "$$p $$p"; done | \
- sed 's/$(EXEEXT)$$//' | \
- while read p p1; do if test -f $$p \
- || test -f $$p1 \
- ; then echo "$$p"; echo "$$p"; else :; fi; \
- done | \
- sed -e 'p;s,.*/,,;n;h' \
- -e 's|.*|.|' \
- -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
- sed 'N;N;N;s,\n, ,g' | \
- $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
- { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
- if ($$2 == $$4) files[d] = files[d] " " $$1; \
- else { print "f", $$3 "/" $$4, $$1; } } \
- END { for (d in files) print "f", d, files[d] }' | \
- while read type dir files; do \
- if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
- test -z "$$files" || { \
- echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
- $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
- } \
- ; done
-
-uninstall-binPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
- files=`for p in $$list; do echo "$$p"; done | \
- sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
- -e 's/$$/$(EXEEXT)/' \
- `; \
- test -n "$$list" || exit 0; \
- echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(bindir)" && rm -f $$files
-
-clean-binPROGRAMS:
- @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
- echo " rm -f" $$list; \
- rm -f $$list || exit $$?; \
- test -n "$(EXEEXT)" || exit 0; \
- list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
- echo " rm -f" $$list; \
- rm -f $$list
-
-clean-noinstPROGRAMS:
- @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
- echo " rm -f" $$list; \
- rm -f $$list || exit $$?; \
- test -n "$(EXEEXT)" || exit 0; \
- list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
- echo " rm -f" $$list; \
- rm -f $$list
clean-noinstLIBRARIES:
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
+src/$(am__dirstamp):
+ @$(MKDIR_P) src
+ @: > src/$(am__dirstamp)
+src/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) src/$(DEPDIR)
+ @: > src/$(DEPDIR)/$(am__dirstamp)
+src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT): \
+ src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp)
+.libs/$(am__dirstamp):
+ @$(MKDIR_P) .libs
+ @: > .libs/$(am__dirstamp)
+
+.libs/libpcre2-fuzzsupport.a: $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_a_DEPENDENCIES) .libs/$(am__dirstamp)
+ $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport.a
+ $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_a_AR) .libs/libpcre2-fuzzsupport.a $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_LIBADD)
+ $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport.a
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@@ -1549,22 +1404,6 @@ clean-libLTLIBRARIES:
echo rm -f $${locs}; \
rm -f $${locs}; \
}
-src/$(am__dirstamp):
- @$(MKDIR_P) src
- @: > src/$(am__dirstamp)
-src/$(DEPDIR)/$(am__dirstamp):
- @$(MKDIR_P) src/$(DEPDIR)
- @: > src/$(DEPDIR)/$(am__dirstamp)
-src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT): \
- src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp)
-.libs/$(am__dirstamp):
- @$(MKDIR_P) .libs
- @: > .libs/$(am__dirstamp)
-
-.libs/libpcre2-fuzzsupport.a: $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_a_DEPENDENCIES) .libs/$(am__dirstamp)
- $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport.a
- $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_a_AR) .libs/libpcre2-fuzzsupport.a $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_LIBADD)
- $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport.a
src/libpcre2_16_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \
@@ -1597,8 +1436,6 @@ src/libpcre2_16_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_16_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
-src/libpcre2_16_la-pcre2_script_run.lo: src/$(am__dirstamp) \
- src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_16_la-pcre2_serialize.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_16_la-pcre2_string_utils.lo: src/$(am__dirstamp) \
@@ -1654,8 +1491,6 @@ src/libpcre2_32_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_32_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
-src/libpcre2_32_la-pcre2_script_run.lo: src/$(am__dirstamp) \
- src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_32_la-pcre2_serialize.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_32_la-pcre2_string_utils.lo: src/$(am__dirstamp) \
@@ -1711,8 +1546,6 @@ src/libpcre2_8_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_8_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
-src/libpcre2_8_la-pcre2_script_run.lo: src/$(am__dirstamp) \
- src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_8_la-pcre2_serialize.lo: src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
src/libpcre2_8_la-pcre2_string_utils.lo: src/$(am__dirstamp) \
@@ -1741,6 +1574,64 @@ src/libpcre2_posix_la-pcre2posix.lo: src/$(am__dirstamp) \
libpcre2-posix.la: $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_DEPENDENCIES) $(EXTRA_libpcre2_posix_la_DEPENDENCIES)
$(AM_V_CCLD)$(libpcre2_posix_la_LINK) $(am_libpcre2_posix_la_rpath) $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_LIBADD) $(LIBS)
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p \
+ || test -f $$p1 \
+ ; then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' \
+ -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' \
+ `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
src/dftables.$(OBJEXT): src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
@@ -1815,100 +1706,91 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/dftables.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2test-pcre2test.Po@am__quote@ # am--include-marker
-
-$(am__depfiles_remade):
- @$(MKDIR_P) $(@D)
- @echo '# dummy' >$@-t && $(am__mv) $@-t $@
-
-am--depfiles: $(am__depfiles_remade)
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/dftables.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2test-pcre2test.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
@@ -2060,13 +1942,6 @@ src/libpcre2_16_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c
-src/libpcre2_16_la-pcre2_script_run.lo: src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_16_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-
src/libpcre2_16_la-pcre2_serialize.lo: src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo -c -o src/libpcre2_16_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo
@@ -2249,13 +2124,6 @@ src/libpcre2_32_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c
-src/libpcre2_32_la-pcre2_script_run.lo: src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_32_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-
src/libpcre2_32_la-pcre2_serialize.lo: src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo -c -o src/libpcre2_32_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo
@@ -2438,13 +2306,6 @@ src/libpcre2_8_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c
-src/libpcre2_8_la-pcre2_script_run.lo: src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_8_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c
-
src/libpcre2_8_la-pcre2_serialize.lo: src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo -c -o src/libpcre2_8_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo
@@ -2958,7 +2819,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
fi; \
$$success || exit 1
-check-TESTS: $(check_SCRIPTS)
+check-TESTS:
@list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list
@list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list
@test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
@@ -3015,10 +2876,7 @@ RunGrepTest.log: RunGrepTest
@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT)
-distdir: $(BUILT_SOURCES)
- $(MAKE) $(AM_MAKEFLAGS) distdir-am
-
-distdir-am: $(DISTFILES)
+distdir: $(DISTFILES)
$(am__remove_distdir)
test -d "$(distdir)" || mkdir "$(distdir)"
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
@@ -3184,12 +3042,12 @@ check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) check-TESTS
check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-am
-all-am: Makefile $(PROGRAMS) $(LIBRARIES) $(LTLIBRARIES) $(SCRIPTS) \
+all-am: Makefile $(LIBRARIES) $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) \
$(MANS) $(DATA) $(HEADERS)
install-binPROGRAMS: install-libLTLIBRARIES
installdirs:
- for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: $(BUILT_SOURCES)
@@ -3242,94 +3100,7 @@ clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
distclean: distclean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/dftables.Po
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo
- -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po
- -rm -f src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po
- -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po
+ -rm -rf src/$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-hdr distclean-libtool distclean-local distclean-tags
@@ -3380,94 +3151,7 @@ installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
- -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/dftables.Po
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo
- -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo
- -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo
- -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po
- -rm -f src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po
- -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po
+ -rm -rf src/$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
@@ -3494,9 +3178,9 @@ uninstall-man: uninstall-man1 uninstall-man3
.MAKE: all check check-am install install-am install-strip
-.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles am--refresh check \
- check-TESTS check-am clean clean-binPROGRAMS clean-cscope \
- clean-generic clean-libLTLIBRARIES clean-libtool clean-local \
+.PHONY: CTAGS GTAGS TAGS all all-am am--refresh check check-TESTS \
+ check-am clean clean-binPROGRAMS clean-cscope clean-generic \
+ clean-libLTLIBRARIES clean-libtool clean-local \
clean-noinstLIBRARIES clean-noinstPROGRAMS cscope \
cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \
dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \
diff --git a/dist2/NEWS b/dist2/NEWS
index bd0df866..94345b3c 100644
--- a/dist2/NEWS
+++ b/dist2/NEWS
@@ -2,54 +2,6 @@ News about PCRE2 releases
-------------------------
-Version 10.33-RC1 16-April-2019
--------------------------------
-
-Yet more bugfixes, tidies, and a few enhancements, summarized here (see
-ChangeLog for the full list):
-
-1. Callouts from pcre2_substitute() are now available.
-
-2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
-functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
-names are defined as macros. This should help avoid linking with the wrong
-library in some environments, while still exporting the POSIX names for
-pre-existing programs that use them.
-
-3. Some new options:
-
- (a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n.
-
- (b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...}
- construct.
-
- (c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be
- made, instead of just remembering a pointer.
-
-4. Some new Perl features:
-
- (a) Perl 5.28's experimental alphabetic names for atomic groups and
- lookaround assertions, for example, (*pla:...) and (*atomic:...).
-
- (b) The new Perl "script run" features (*script_run:...) and
- (*atomic_script_run:...) aka (*sr:...) and (*asr:...).
-
- (c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in
- capture group names.
-
-5. --disable-percent-zt disables the use of %zu and %td in formatting strings
-in pcre2test. They were already automatically disabled for VC and older C
-compilers.
-
-6. Some changes related to callouts in pcre2grep:
-
- (a) Support for running an external program under VMS has been added, in
- addition to Windows and fork() support.
-
- (b) --disable-pcre2grep-callout-fork restricts the callout support in
- to the inbuilt echo facility.
-
-
Version 10.32 10-September-2018
-------------------------------
diff --git a/dist2/NON-AUTOTOOLS-BUILD b/dist2/NON-AUTOTOOLS-BUILD
index 39e7620a..b742ed34 100644
--- a/dist2/NON-AUTOTOOLS-BUILD
+++ b/dist2/NON-AUTOTOOLS-BUILD
@@ -48,7 +48,7 @@ can skip ahead to the CMake section.
macro to specify what character(s) you want to be interpreted as line
terminators by default.
- When you subsequently compile any of the PCRE2 modules, you must specify
+ When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
@@ -62,11 +62,6 @@ can skip ahead to the CMake section.
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
- Note also that the src/config.h.generic file is created from a config.h
- that was generated by Autotools, which automatically includes settings of
- a number of macros that are not actually used by PCRE2 (for example,
- HAVE_MEMORY_H).
-
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
@@ -109,7 +104,6 @@ can skip ahead to the CMake section.
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
- pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
@@ -401,6 +395,6 @@ Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
-==============================
-Last Updated: 14 November 2018
-==============================
+===========================
+Last Updated: 19 April 2018
+===========================
diff --git a/dist2/README b/dist2/README
index ff9a6af6..2eb621b0 100644
--- a/dist2/README
+++ b/dist2/README
@@ -1,11 +1,9 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
-PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
-API. Since its initial release in 2015, there has been further development of
-the code and it now differs from PCRE1 in more than just the API. There are new
-features and the internals have been improved. The latest release of PCRE2 is
-always available in three alternative formats from:
+PCRE2 is a re-working of the original PCRE library to provide an entirely new
+API. The latest release of PCRE2 is always available in three alternative
+formats from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
@@ -41,13 +39,13 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
-32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
-are no C++ wrappers.
+32-bit library, which processes strings of 32-bit values. There are no C++
+wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
-man page). These are built into a library called libpcre2-posix. Note that this
-just provides a POSIX calling interface to PCRE2; the regular expressions
+man page). These can be found in a library called libpcre2-posix. Note that
+this just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
@@ -55,8 +53,20 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
-renamed or pointed at by a link (or the program modified, of course). See the
-pcre2posix documentation for more details.
+renamed or pointed at by a link.
+
+If you are using the POSIX interface to PCRE2 and there is already a POSIX
+regex library installed on your system, as well as worrying about the regex.h
+header file (as mentioned above), you must also take care when linking programs
+to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
+may pick up the POSIX functions of the same name from the other library.
+
+One way of avoiding this confusion is to compile PCRE2 with the addition of
+-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
+compiler flags (CFLAGS if you are using "configure" -- see below). This has the
+effect of renaming the functions so that the names no longer clash. Of course,
+you have to do the same thing for your applications, or write them using the
+new names.
Documentation for PCRE2
@@ -313,11 +323,7 @@ library. They are also documented in the pcre2build man page.
. There is support for calling external programs during matching in the
pcre2grep command, using PCRE2's callout facility with string arguments. This
support can be disabled by adding --disable-pcre2grep-callout to the
- "configure" command. There are two kinds of callout: one that generates
- output from inbuilt code, and another that calls an external program. The
- latter has special support for Windows and VMS; otherwise it assumes the
- existence of the fork() function. This facility can be disabled by adding
- --disable-pcre2grep-callout-fork to the "configure" command.
+ "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@@ -370,15 +376,6 @@ library. They are also documented in the pcre2build man page.
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it.
-. The C99 standard defines formatting modifiers z and t for size_t and
- ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
- environments other than Microsoft Visual Studio when __STDC_VERSION__ is
- defined and has a value greater than or equal to 199901L (indicating C99).
- However, there is at least one environment that claims to be C99 but does not
- support these modifiers. If --disable-percent-zt is specified, no use is made
- of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
- size_t values.
-
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
@@ -792,7 +789,6 @@ The distribution should contain the files listed below.
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
- src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
@@ -892,4 +888,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Last updated: 17 June 2018
diff --git a/dist2/RunGrepTest b/dist2/RunGrepTest
index bac1f1b2..74ff4c13 100755
--- a/dist2/RunGrepTest
+++ b/dist2/RunGrepTest
@@ -170,11 +170,11 @@ echo "---------------------------- Test 14 -----------------------------" >>test
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
@@ -296,7 +296,7 @@ echo "---------------------------- Test 45 ------------------------------" >>tes
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
@@ -714,46 +714,34 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-# It seems impossible to handle NUL characters easily in many operating
-# systems, including Solaris (aka SunOS), where the version of sed explicitly
-# doesn't like them, and also MacOS (Darwin), OpenBSD, FreeBSD, and NetBSD. So
-# now we run this test only on OS that are known to work. For the rest, we
-# fudge the output so that the comparison works.
+# It seems impossible to handle NUL characters easily in Solaris (aka SunOS).
+# The version of sed explicitly doesn't like them. For the moment, we just
+# don't run this test under SunOS. Fudge the output so that the comparison
+# works. A similar problem has also been reported for MacOS (Darwin).
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
uname=`uname`
-case $uname in
- Linux)
- printf 'abc\0def' >testNinputgrep
- $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
- echo "" >>testtrygrep
- ;;
- *)
- echo '1:abcZERO2:def' >>testtrygrep
- ;;
-esac
+if [ "$uname" != "SunOS" -a "$uname" != "Darwin" ] ; then
+ printf 'abc\0def' >testNinputgrep
+ $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
+ echo "" >>testtrygrep
+else
+ echo '1:abcZERO2:def' >>testtrygrep
+fi
$cf $srcdir/testdata/grepoutputN testtrygrep
if [ $? != 0 ] ; then exit 1; fi
-# If pcre2grep supports script callouts, run some tests on them. It is possible
-# to restrict these callouts to the non-fork case, either for security, or for
-# environments that do not support fork(). This is handled by comparing to a
-# different output.
+# If pcre2grep supports script callouts, run some tests on them.
-if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then
+if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scripts in patterns are supported'; then
echo "Testing pcre2grep script callouts"
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
-
- if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
- $cf $srcdir/testdata/grepoutputCN testtrygrep
- else
- $cf $srcdir/testdata/grepoutputC testtrygrep
- fi
-
+ # The above has no newline, which 'diff -ub' ignores, so add one.
+ $cf $srcdir/testdata/grepoutputC testtrygrep
if [ $? != 0 ] ; then exit 1; fi
else
echo "Script callouts are not supported"
diff --git a/dist2/RunGrepTest.bat b/dist2/RunGrepTest.bat
index 4a095a36..50a96448 100644
--- a/dist2/RunGrepTest.bat
+++ b/dist2/RunGrepTest.bat
@@ -653,19 +653,14 @@ if ERRORLEVEL 1 exit /b 1
:: If pcre2grep supports script callouts, run some tests on them.
-%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
+%pcre2grep% --help | %pcre2grep% -q "Callout scripts in patterns are supported"
if %ERRORLEVEL% equ 0 (
echo Testing pcre2grep script callouts
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
- %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
- if %ERRORLEVEL% equ 0 (
- %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
- ) else (
- %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
- )
+ %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1
) else (
echo Script callouts are not supported
diff --git a/dist2/RunTest b/dist2/RunTest
index 6715d153..39f04d40 100755
--- a/dist2/RunTest
+++ b/dist2/RunTest
@@ -319,7 +319,7 @@ fi
# If it is possible to set the system stack size and -bigstack was given,
# set up a large stack.
-$sim ./pcre2test -S 64 /dev/null /dev/null
+$sim ./pcre2test -S 1 /dev/null /dev/null
if [ $? -eq 0 -a "$bigstack" != "" ] ; then
setstack="-S 64"
else
diff --git a/dist2/aclocal.m4 b/dist2/aclocal.m4
index 87e34b08..cc10b262 100644
--- a/dist2/aclocal.m4
+++ b/dist2/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -364,7 +364,7 @@ AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
[AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
])dnl PKG_HAVE_DEFINE_WITH_MODULES
-# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+# Copyright (C) 2002-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -376,10 +376,10 @@ AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
-[am__api_version='1.16'
+[am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.16.1], [],
+m4_if([$1], [1.15.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -395,12 +395,12 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.16.1])dnl
+[AM_AUTOMAKE_VERSION([1.15.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2011-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -462,7 +462,7 @@ AC_SUBST([AR])dnl
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -514,7 +514,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -545,7 +545,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -736,12 +736,13 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+
# _AM_OUTPUT_DEPENDENCY_COMMANDS
# ------------------------------
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
@@ -749,41 +750,49 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
- # TODO: see whether this extra hack can be removed once we start
- # requiring Autoconf 2.70 or later.
- AS_CASE([$CONFIG_FILES],
- [*\'*], [eval set x "$CONFIG_FILES"],
- [*], [set x $CONFIG_FILES])
+ case $CONFIG_FILES in
+ *\'*) eval set x "$CONFIG_FILES" ;;
+ *) set x $CONFIG_FILES ;;
+ esac
shift
- # Used to flag and report bootstrapping failures.
- am_rc=0
- for am_mf
+ for mf
do
# Strip MF so we end up with the name of the file.
- am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile which includes
- # dependency-tracking related rules and includes.
- # Grep'ing the whole file directly is not great: AIX grep has a line
+ mf=`echo "$mf" | sed -e 's/:.*$//'`
+ # Check whether this is an Automake generated Makefile or not.
+ # We used to match only the files named 'Makefile.in', but
+ # some people rename them; so instead we look at the file content.
+ # Grep'ing the first line is not enough: some people post-process
+ # each Makefile.in and add a new line on top of each file to say so.
+ # Grep'ing the whole file is not good either: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
- sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
- || continue
- am_dirpart=`AS_DIRNAME(["$am_mf"])`
- am_filepart=`AS_BASENAME(["$am_mf"])`
- AM_RUN_LOG([cd "$am_dirpart" \
- && sed -e '/# am--include-marker/d' "$am_filepart" \
- | $MAKE -f - am--depfiles]) || am_rc=$?
+ if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
+ dirpart=`AS_DIRNAME("$mf")`
+ else
+ continue
+ fi
+ # Extract the definition of DEPDIR, am__include, and am__quote
+ # from the Makefile without running 'make'.
+ DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
+ test -z "$DEPDIR" && continue
+ am__include=`sed -n 's/^am__include = //p' < "$mf"`
+ test -z "$am__include" && continue
+ am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
+ # Find all dependency output files, they are included files with
+ # $(DEPDIR) in their names. We invoke sed twice because it is the
+ # simplest approach to changing $(DEPDIR) to its actual value in the
+ # expansion.
+ for file in `sed -n "
+ s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
+ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
+ # Make sure the directory exists.
+ test -f "$dirpart/$file" && continue
+ fdir=`AS_DIRNAME(["$file"])`
+ AS_MKDIR_P([$dirpart/$fdir])
+ # echo "creating $dirpart/$file"
+ echo '# dummy' > "$dirpart/$file"
+ done
done
- if test $am_rc -ne 0; then
- AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
- for automatic dependency tracking. Try re-running configure with the
- '--disable-dependency-tracking' option to at least be able to build
- the package (albeit without support for automatic dependency tracking).])
- fi
- AS_UNSET([am_dirpart])
- AS_UNSET([am_filepart])
- AS_UNSET([am_mf])
- AS_UNSET([am_rc])
- rm -f conftest-deps.mk
}
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
@@ -792,17 +801,18 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
# -----------------------------
# This macro should only be invoked once -- use via AC_REQUIRE.
#
-# This code is only required when automatic dependency tracking is enabled.
-# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
-# order to bootstrap the dependency handling code.
+# This code is only required when automatic dependency tracking
+# is enabled. FIXME. This creates each '.P' file that we will
+# need in order to bootstrap the dependency handling code.
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
[AC_CONFIG_COMMANDS([depfiles],
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
- [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
+ [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
+])
# Do all the work for Automake. -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -889,8 +899,8 @@ AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# For better backward compatibility. To be removed once Automake 1.9.x
# dies out for good. For more background, see:
-# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
-# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
@@ -957,7 +967,7 @@ END
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
-that behaves properly: <https://www.gnu.org/software/coreutils/>.
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
@@ -999,7 +1009,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1020,7 +1030,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
-# Copyright (C) 2003-2018 Free Software Foundation, Inc.
+# Copyright (C) 2003-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1041,7 +1051,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1049,42 +1059,49 @@ AC_SUBST([am__leading_dot])])
# AM_MAKE_INCLUDE()
# -----------------
-# Check whether make has an 'include' directive that can support all
-# the idioms we need for our automatic dependency tracking code.
+# Check to see how make treats includes.
AC_DEFUN([AM_MAKE_INCLUDE],
-[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
-cat > confinc.mk << 'END'
+[am_make=${MAKE-make}
+cat > confinc << 'END'
am__doit:
- @echo this is the am__doit target >confinc.out
+ @echo this is the am__doit target
.PHONY: am__doit
END
+# If we don't find an include directive, just comment out the code.
+AC_MSG_CHECKING([for style of include used by $am_make])
am__include="#"
am__quote=
-# BSD make does it like this.
-echo '.include "confinc.mk" # ignored' > confmf.BSD
-# Other make implementations (GNU, Solaris 10, AIX) do it like this.
-echo 'include confinc.mk # ignored' > confmf.GNU
-_am_result=no
-for s in GNU BSD; do
- AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
- AS_CASE([$?:`cat confinc.out 2>/dev/null`],
- ['0:this is the am__doit target'],
- [AS_CASE([$s],
- [BSD], [am__include='.include' am__quote='"'],
- [am__include='include' am__quote=''])])
- if test "$am__include" != "#"; then
- _am_result="yes ($s style)"
- break
- fi
-done
-rm -f confinc.* confmf.*
-AC_MSG_RESULT([${_am_result}])
-AC_SUBST([am__include])])
-AC_SUBST([am__quote])])
+_am_result=none
+# First try GNU make style include.
+echo "include confinc" > confmf
+# Ignore all kinds of additional output from 'make'.
+case `$am_make -s -f confmf 2> /dev/null` in #(
+*the\ am__doit\ target*)
+ am__include=include
+ am__quote=
+ _am_result=GNU
+ ;;
+esac
+# Now try BSD make style include.
+if test "$am__include" = "#"; then
+ echo '.include "confinc"' > confmf
+ case `$am_make -s -f confmf 2> /dev/null` in #(
+ *the\ am__doit\ target*)
+ am__include=.include
+ am__quote="\""
+ _am_result=BSD
+ ;;
+ esac
+fi
+AC_SUBST([am__include])
+AC_SUBST([am__quote])
+AC_MSG_RESULT([$_am_result])
+rm -f confinc confmf
+])
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1123,7 +1140,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1152,7 +1169,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1199,7 +1216,7 @@ AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1218,7 +1235,7 @@ AC_DEFUN([AM_RUN_LOG],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1299,7 +1316,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
-# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+# Copyright (C) 2009-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1359,7 +1376,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1387,7 +1404,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+# Copyright (C) 2006-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1406,7 +1423,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004-2018 Free Software Foundation, Inc.
+# Copyright (C) 2004-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/dist2/ar-lib b/dist2/ar-lib
index 0baa4f60..05094d34 100755
--- a/dist2/ar-lib
+++ b/dist2/ar-lib
@@ -4,7 +4,7 @@
me=ar-lib
scriptversion=2012-03-01.08; # UTC
-# Copyright (C) 2010-2018 Free Software Foundation, Inc.
+# Copyright (C) 2010-2017 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
@@ -18,7 +18,7 @@ scriptversion=2012-03-01.08; # UTC
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
diff --git a/dist2/compile b/dist2/compile
index 99e50524..2ab71e4e 100755
--- a/dist2/compile
+++ b/dist2/compile
@@ -1,9 +1,9 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
-scriptversion=2018-03-07.03; # UTC
+scriptversion=2016-01-11.22; # UTC
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
@@ -17,7 +17,7 @@ scriptversion=2018-03-07.03; # UTC
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -340,7 +340,7 @@ exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
diff --git a/dist2/config-cmake.h.in b/dist2/config-cmake.h.in
index 529b0093..9b4f15da 100644
--- a/dist2/config-cmake.h.in
+++ b/dist2/config-cmake.h.in
@@ -18,7 +18,6 @@
#cmakedefine SUPPORT_PCRE2_16 1
#cmakedefine SUPPORT_PCRE2_32 1
#cmakedefine PCRE2_DEBUG 1
-#cmakedefine DISABLE_PERCENT_ZT 1
#cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBEDIT 1
@@ -28,8 +27,6 @@
#cmakedefine SUPPORT_JIT 1
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
-#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
-#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
#cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1
diff --git a/dist2/config.guess b/dist2/config.guess
index 256083a7..2193702b 100755
--- a/dist2/config.guess
+++ b/dist2/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2018 Free Software Foundation, Inc.
+# Copyright 1992-2017 Free Software Foundation, Inc.
-timestamp='2018-03-08'
+timestamp='2017-05-27'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2018-03-08'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -27,7 +27,7 @@ timestamp='2018-03-08'
# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
#
# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
#
# Please send patches to <config-patches@gnu.org>.
@@ -39,7 +39,7 @@ Usage: $0 [OPTION]
Output the configuration name of the system \`$me' is run on.
-Options:
+Operation modes:
-h, --help print this help, then exit
-t, --time-stamp print date of last modification, then exit
-v, --version print version number, then exit
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2018 Free Software Foundation, Inc.
+Copyright 1992-2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -107,9 +107,9 @@ trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
dummy=$tmp/dummy ;
tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,) echo "int x;" > "$dummy.c" ;
+ ,,) echo "int x;" > $dummy.c ;
for c in cc gcc c89 c99 ; do
- if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+ if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
CC_FOR_BUILD="$c"; break ;
fi ;
done ;
@@ -132,14 +132,14 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-case "$UNAME_SYSTEM" in
+case "${UNAME_SYSTEM}" in
Linux|GNU|GNU/*)
# If the system lacks a compiler, then just pick glibc.
# We could probably try harder.
LIBC=gnu
- eval "$set_cc_for_build"
- cat <<-EOF > "$dummy.c"
+ eval $set_cc_for_build
+ cat <<-EOF > $dummy.c
#include <features.h>
#if defined(__UCLIBC__)
LIBC=uclibc
@@ -149,20 +149,13 @@ Linux|GNU|GNU/*)
LIBC=gnu
#endif
EOF
- eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`"
-
- # If ldd exists, use it to detect musl libc.
- if command -v ldd >/dev/null && \
- ldd --version 2>&1 | grep -q ^musl
- then
- LIBC=musl
- fi
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
;;
esac
# Note: order is significant - the case branches are not exclusive.
-case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -176,30 +169,30 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# portion of the name. We always set it to "unknown".
sysctl="sysctl -n hw.machine_arch"
UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
- "/sbin/$sysctl" 2>/dev/null || \
- "/usr/sbin/$sysctl" 2>/dev/null || \
+ /sbin/$sysctl 2>/dev/null || \
+ /usr/sbin/$sysctl 2>/dev/null || \
echo unknown)`
- case "$UNAME_MACHINE_ARCH" in
+ case "${UNAME_MACHINE_ARCH}" in
armeb) machine=armeb-unknown ;;
arm*) machine=arm-unknown ;;
sh3el) machine=shl-unknown ;;
sh3eb) machine=sh-unknown ;;
sh5el) machine=sh5le-unknown ;;
earmv*)
- arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
- endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
- machine="${arch}${endian}"-unknown
+ arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+ endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
+ machine=${arch}${endian}-unknown
;;
- *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+ *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
esac
# The Operating System including object format, if it has switched
# to ELF recently (or will in the future) and ABI.
- case "$UNAME_MACHINE_ARCH" in
+ case "${UNAME_MACHINE_ARCH}" in
earm*)
os=netbsdelf
;;
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
- eval "$set_cc_for_build"
+ eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
then
@@ -215,10 +208,10 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
;;
esac
# Determine ABI tags.
- case "$UNAME_MACHINE_ARCH" in
+ case "${UNAME_MACHINE_ARCH}" in
earm*)
expr='s/^earmv[0-9]/-eabi/;s/eb$//'
- abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
+ abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
;;
esac
# The OS release
@@ -226,55 +219,46 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# thus, need a distinct triplet. However, they do not need
# kernel version information, so it can be replaced with a
# suitable tag, in the style of linux-gnu.
- case "$UNAME_VERSION" in
+ case "${UNAME_VERSION}" in
Debian*)
release='-gnu'
;;
*)
- release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
+ release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
;;
esac
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
# contains redundant information, the shorter form:
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "$machine-${os}${release}${abi}"
+ echo "${machine}-${os}${release}${abi}"
exit ;;
*:Bitrig:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
- echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
exit ;;
*:OpenBSD:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
- echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
exit ;;
*:LibertyBSD:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
- echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
- exit ;;
- *:MidnightBSD:*:*)
- echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
exit ;;
*:ekkoBSD:*:*)
- echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
exit ;;
*:SolidBSD:*:*)
- echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
exit ;;
macppc:MirBSD:*:*)
- echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
+ echo powerpc-unknown-mirbsd${UNAME_RELEASE}
exit ;;
*:MirBSD:*:*)
- echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
exit ;;
*:Sortix:*:*)
- echo "$UNAME_MACHINE"-unknown-sortix
- exit ;;
- *:Redox:*:*)
- echo "$UNAME_MACHINE"-unknown-redox
+ echo ${UNAME_MACHINE}-unknown-sortix
exit ;;
- mips:OSF1:*.*)
- echo mips-dec-osf1
- exit ;;
alpha:OSF1:*:*)
case $UNAME_RELEASE in
*4.0)
@@ -326,19 +310,28 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# A Tn.n version is a released field test version.
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
- echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`"
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
exit $exitcode ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+ exit ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+ exit ;;
Amiga*:UNIX_System_V:4.0:*)
echo m68k-unknown-sysv4
exit ;;
*:[Aa]miga[Oo][Ss]:*:*)
- echo "$UNAME_MACHINE"-unknown-amigaos
+ echo ${UNAME_MACHINE}-unknown-amigaos
exit ;;
*:[Mm]orph[Oo][Ss]:*:*)
- echo "$UNAME_MACHINE"-unknown-morphos
+ echo ${UNAME_MACHINE}-unknown-morphos
exit ;;
*:OS/390:*:*)
echo i370-ibm-openedition
@@ -350,7 +343,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
echo powerpc-ibm-os400
exit ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
- echo arm-acorn-riscix"$UNAME_RELEASE"
+ echo arm-acorn-riscix${UNAME_RELEASE}
exit ;;
arm*:riscos:*:*|arm*:RISCOS:*:*)
echo arm-unknown-riscos
@@ -377,19 +370,19 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
sparc) echo sparc-icl-nx7; exit ;;
esac ;;
s390x:SunOS:*:*)
- echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+ echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4H:SunOS:5.*:*)
- echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
- echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`"
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
- echo i386-pc-auroraux"$UNAME_RELEASE"
+ echo i386-pc-auroraux${UNAME_RELEASE}
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
- eval "$set_cc_for_build"
+ eval $set_cc_for_build
SUN_ARCH=i386
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
@@ -402,13 +395,13 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
SUN_ARCH=x86_64
fi
fi
- echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+ echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4*:SunOS:6*:*)
# According to config.sub, this is the proper way to canonicalize
# SunOS6. Hard to guess exactly what SunOS6 will be like, but
# it's likely to be more like Solaris than SunOS4.
- echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+ echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4*:SunOS:*:*)
case "`/usr/bin/arch -k`" in
@@ -417,25 +410,25 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
;;
esac
# Japanese Language versions have a version number like `4.1.3-JL'.
- echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`"
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
exit ;;
sun3*:SunOS:*:*)
- echo m68k-sun-sunos"$UNAME_RELEASE"
+ echo m68k-sun-sunos${UNAME_RELEASE}
exit ;;
sun*:*:4.2BSD:*)
UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+ test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
case "`/bin/arch`" in
sun3)
- echo m68k-sun-sunos"$UNAME_RELEASE"
+ echo m68k-sun-sunos${UNAME_RELEASE}
;;
sun4)
- echo sparc-sun-sunos"$UNAME_RELEASE"
+ echo sparc-sun-sunos${UNAME_RELEASE}
;;
esac
exit ;;
aushp:SunOS:*:*)
- echo sparc-auspex-sunos"$UNAME_RELEASE"
+ echo sparc-auspex-sunos${UNAME_RELEASE}
exit ;;
# The situation for MiNT is a little confusing. The machine name
# can be virtually everything (everything which is not
@@ -446,44 +439,44 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# MiNT. But MiNT is downward compatible to TOS, so this should
# be no problem.
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint"$UNAME_RELEASE"
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint"$UNAME_RELEASE"
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint"$UNAME_RELEASE"
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint"$UNAME_RELEASE"
+ echo m68k-milan-mint${UNAME_RELEASE}
exit ;;
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint"$UNAME_RELEASE"
+ echo m68k-hades-mint${UNAME_RELEASE}
exit ;;
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint"$UNAME_RELEASE"
+ echo m68k-unknown-mint${UNAME_RELEASE}
exit ;;
m68k:machten:*:*)
- echo m68k-apple-machten"$UNAME_RELEASE"
+ echo m68k-apple-machten${UNAME_RELEASE}
exit ;;
powerpc:machten:*:*)
- echo powerpc-apple-machten"$UNAME_RELEASE"
+ echo powerpc-apple-machten${UNAME_RELEASE}
exit ;;
RISC*:Mach:*:*)
echo mips-dec-mach_bsd4.3
exit ;;
RISC*:ULTRIX:*:*)
- echo mips-dec-ultrix"$UNAME_RELEASE"
+ echo mips-dec-ultrix${UNAME_RELEASE}
exit ;;
VAX*:ULTRIX*:*:*)
- echo vax-dec-ultrix"$UNAME_RELEASE"
+ echo vax-dec-ultrix${UNAME_RELEASE}
exit ;;
2020:CLIX:*:* | 2430:CLIX:*:*)
- echo clipper-intergraph-clix"$UNAME_RELEASE"
+ echo clipper-intergraph-clix${UNAME_RELEASE}
exit ;;
mips:*:*:UMIPS | mips:*:*:RISCos)
- eval "$set_cc_for_build"
- sed 's/^ //' << EOF > "$dummy.c"
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
#ifdef __cplusplus
#include <stdio.h> /* for printf() prototype */
int main (int argc, char *argv[]) {
@@ -492,23 +485,23 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
#endif
#if defined (host_mips) && defined (MIPSEB)
#if defined (SYSTYPE_SYSV)
- printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
#endif
#if defined (SYSTYPE_SVR4)
- printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
#endif
#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
- printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
#endif
#endif
exit (-1);
}
EOF
- $CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
- dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
- SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
+ $CC_FOR_BUILD -o $dummy $dummy.c &&
+ dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+ SYSTEM_NAME=`$dummy $dummyarg` &&
{ echo "$SYSTEM_NAME"; exit; }
- echo mips-mips-riscos"$UNAME_RELEASE"
+ echo mips-mips-riscos${UNAME_RELEASE}
exit ;;
Motorola:PowerMAX_OS:*:*)
echo powerpc-motorola-powermax
@@ -534,17 +527,17 @@ EOF
AViiON:dgux:*:*)
# DG/UX returns AViiON for all architectures
UNAME_PROCESSOR=`/usr/bin/uname -p`
- if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ]
+ if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
then
- if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \
- [ "$TARGET_BINARY_INTERFACE"x = x ]
+ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+ [ ${TARGET_BINARY_INTERFACE}x = x ]
then
- echo m88k-dg-dgux"$UNAME_RELEASE"
+ echo m88k-dg-dgux${UNAME_RELEASE}
else
- echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+ echo m88k-dg-dguxbcs${UNAME_RELEASE}
fi
else
- echo i586-dg-dgux"$UNAME_RELEASE"
+ echo i586-dg-dgux${UNAME_RELEASE}
fi
exit ;;
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
@@ -561,7 +554,7 @@ EOF
echo m68k-tektronix-bsd
exit ;;
*:IRIX*:*:*)
- echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`"
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
exit ;;
????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
@@ -573,14 +566,14 @@ EOF
if [ -x /usr/bin/oslevel ] ; then
IBM_REV=`/usr/bin/oslevel`
else
- IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
fi
- echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
+ echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
exit ;;
*:AIX:2:3)
if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
- eval "$set_cc_for_build"
- sed 's/^ //' << EOF > "$dummy.c"
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
#include <sys/systemcfg.h>
main()
@@ -591,7 +584,7 @@ EOF
exit(0);
}
EOF
- if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
+ if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
then
echo "$SYSTEM_NAME"
else
@@ -605,7 +598,7 @@ EOF
exit ;;
*:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
- if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
+ if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000
else
IBM_ARCH=powerpc
@@ -614,18 +607,18 @@ EOF
IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
else
- IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
fi
- echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
exit ;;
*:AIX:*:*)
echo rs6000-ibm-aix
exit ;;
- ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
echo romp-ibm-bsd4.4
exit ;;
ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
- echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
exit ;; # report: romp-ibm BSD 4.3
*:BOSX:*:*)
echo rs6000-bull-bosx
@@ -640,28 +633,28 @@ EOF
echo m68k-hp-bsd4.4
exit ;;
9000/[34678]??:HP-UX:*:*)
- HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
- case "$UNAME_MACHINE" in
- 9000/31?) HP_ARCH=m68000 ;;
- 9000/[34]??) HP_ARCH=m68k ;;
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ case "${UNAME_MACHINE}" in
+ 9000/31? ) HP_ARCH=m68000 ;;
+ 9000/[34]?? ) HP_ARCH=m68k ;;
9000/[678][0-9][0-9])
if [ -x /usr/bin/getconf ]; then
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "$sc_cpu_version" in
+ case "${sc_cpu_version}" in
523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
532) # CPU_PA_RISC2_0
- case "$sc_kernel_bits" in
+ case "${sc_kernel_bits}" in
32) HP_ARCH=hppa2.0n ;;
64) HP_ARCH=hppa2.0w ;;
'') HP_ARCH=hppa2.0 ;; # HP-UX 10.20
esac ;;
esac
fi
- if [ "$HP_ARCH" = "" ]; then
- eval "$set_cc_for_build"
- sed 's/^ //' << EOF > "$dummy.c"
+ if [ "${HP_ARCH}" = "" ]; then
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
#define _HPUX_SOURCE
#include <stdlib.h>
@@ -694,13 +687,13 @@ EOF
exit (0);
}
EOF
- (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
+ (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa
fi ;;
esac
- if [ "$HP_ARCH" = hppa2.0w ]
+ if [ ${HP_ARCH} = hppa2.0w ]
then
- eval "$set_cc_for_build"
+ eval $set_cc_for_build
# hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
# 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
@@ -719,15 +712,15 @@ EOF
HP_ARCH=hppa64
fi
fi
- echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
exit ;;
ia64:HP-UX:*:*)
- HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'`
- echo ia64-hp-hpux"$HPUX_REV"
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ia64-hp-hpux${HPUX_REV}
exit ;;
3050*:HI-UX:*:*)
- eval "$set_cc_for_build"
- sed 's/^ //' << EOF > "$dummy.c"
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
#include <unistd.h>
int
main ()
@@ -752,11 +745,11 @@ EOF
exit (0);
}
EOF
- $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
+ $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
{ echo "$SYSTEM_NAME"; exit; }
echo unknown-hitachi-hiuxwe2
exit ;;
- 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
echo hppa1.1-hp-bsd
exit ;;
9000/8??:4.3bsd:*:*)
@@ -765,7 +758,7 @@ EOF
*9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
echo hppa1.0-hp-mpeix
exit ;;
- hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
echo hppa1.1-hp-osf
exit ;;
hp8??:OSF1:*:*)
@@ -773,9 +766,9 @@ EOF
exit ;;
i*86:OSF1:*:*)
if [ -x /usr/sbin/sysversion ] ; then
- echo "$UNAME_MACHINE"-unknown-osf1mk
+ echo ${UNAME_MACHINE}-unknown-osf1mk
else
- echo "$UNAME_MACHINE"-unknown-osf1
+ echo ${UNAME_MACHINE}-unknown-osf1
fi
exit ;;
parisc*:Lites*:*:*)
@@ -800,109 +793,128 @@ EOF
echo c4-convex-bsd
exit ;;
CRAY*Y-MP:*:*:*)
- echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+ echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*[A-Z]90:*:*:*)
- echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
-e 's/\.[^.]*$/.X/'
exit ;;
CRAY*TS:*:*:*)
- echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*T3E:*:*:*)
- echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*SV1:*:*:*)
- echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
*:UNICOS/mp:*:*)
- echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
+ echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
- FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
5000:UNIX_System_V:4.*:*)
FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
- FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
- echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
exit ;;
sparc*:BSD/OS:*:*)
- echo sparc-unknown-bsdi"$UNAME_RELEASE"
+ echo sparc-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:BSD/OS:*:*)
- echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:FreeBSD:*:*)
UNAME_PROCESSOR=`/usr/bin/uname -p`
- case "$UNAME_PROCESSOR" in
+ case ${UNAME_PROCESSOR} in
amd64)
UNAME_PROCESSOR=x86_64 ;;
i386)
UNAME_PROCESSOR=i586 ;;
esac
- echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
exit ;;
i*:CYGWIN*:*)
- echo "$UNAME_MACHINE"-pc-cygwin
+ echo ${UNAME_MACHINE}-pc-cygwin
exit ;;
*:MINGW64*:*)
- echo "$UNAME_MACHINE"-pc-mingw64
+ echo ${UNAME_MACHINE}-pc-mingw64
exit ;;
*:MINGW*:*)
- echo "$UNAME_MACHINE"-pc-mingw32
+ echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
*:MSYS*:*)
- echo "$UNAME_MACHINE"-pc-msys
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
+ i*:windows32*:*)
+ # uname -m includes "-pc" on this system.
+ echo ${UNAME_MACHINE}-mingw32
exit ;;
i*:PW*:*)
- echo "$UNAME_MACHINE"-pc-pw32
+ echo ${UNAME_MACHINE}-pc-pw32
exit ;;
*:Interix*:*)
- case "$UNAME_MACHINE" in
+ case ${UNAME_MACHINE} in
x86)
- echo i586-pc-interix"$UNAME_RELEASE"
+ echo i586-pc-interix${UNAME_RELEASE}
exit ;;
authenticamd | genuineintel | EM64T)
- echo x86_64-unknown-interix"$UNAME_RELEASE"
+ echo x86_64-unknown-interix${UNAME_RELEASE}
exit ;;
IA64)
- echo ia64-unknown-interix"$UNAME_RELEASE"
+ echo ia64-unknown-interix${UNAME_RELEASE}
exit ;;
esac ;;
+ [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+ echo i${UNAME_MACHINE}-pc-mks
+ exit ;;
+ 8664:Windows_NT:*)
+ echo x86_64-pc-mks
+ exit ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+ echo i586-pc-interix
+ exit ;;
i*:UWIN*:*)
- echo "$UNAME_MACHINE"-pc-uwin
+ echo ${UNAME_MACHINE}-pc-uwin
exit ;;
amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
echo x86_64-unknown-cygwin
exit ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-unknown-cygwin
+ exit ;;
prep*:SunOS:5.*:*)
- echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
+ echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
*:GNU:*:*)
# the GNU system
- echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`"
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
exit ;;
*:GNU/*:*:*)
# other systems with GNU libc and userland
- echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
exit ;;
i*86:Minix:*:*)
- echo "$UNAME_MACHINE"-pc-minix
+ echo ${UNAME_MACHINE}-pc-minix
exit ;;
aarch64:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
aarch64_be:Linux:*:*)
UNAME_MACHINE=aarch64_be
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
@@ -916,63 +928,63 @@ EOF
esac
objdump --private-headers /bin/sh | grep -q ld.so.1
if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arc:Linux:*:* | arceb:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arm*:Linux:*:*)
- eval "$set_cc_for_build"
+ eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
else
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
else
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
fi
fi
exit ;;
avr32*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
cris:Linux:*:*)
- echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
exit ;;
crisv32:Linux:*:*)
- echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-axis-linux-${LIBC}
exit ;;
e2k:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
frv:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
hexagon:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
i*86:Linux:*:*)
- echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
exit ;;
ia64:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
k1om:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
m32r*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
m68*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
mips:Linux:*:* | mips64:Linux:*:*)
- eval "$set_cc_for_build"
- sed 's/^ //' << EOF > "$dummy.c"
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef ${UNAME_MACHINE}
#undef ${UNAME_MACHINE}el
@@ -986,70 +998,70 @@ EOF
#endif
#endif
EOF
- eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
- test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
mips64el:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
openrisc*:Linux:*:*)
- echo or1k-unknown-linux-"$LIBC"
+ echo or1k-unknown-linux-${LIBC}
exit ;;
or32:Linux:*:* | or1k*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
padre:Linux:*:*)
- echo sparc-unknown-linux-"$LIBC"
+ echo sparc-unknown-linux-${LIBC}
exit ;;
parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-"$LIBC"
+ echo hppa64-unknown-linux-${LIBC}
exit ;;
parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level
case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
- PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
- PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
- *) echo hppa-unknown-linux-"$LIBC" ;;
+ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
+ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
+ *) echo hppa-unknown-linux-${LIBC} ;;
esac
exit ;;
ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-"$LIBC"
+ echo powerpc64-unknown-linux-${LIBC}
exit ;;
ppc:Linux:*:*)
- echo powerpc-unknown-linux-"$LIBC"
+ echo powerpc-unknown-linux-${LIBC}
exit ;;
ppc64le:Linux:*:*)
- echo powerpc64le-unknown-linux-"$LIBC"
+ echo powerpc64le-unknown-linux-${LIBC}
exit ;;
ppcle:Linux:*:*)
- echo powerpcle-unknown-linux-"$LIBC"
+ echo powerpcle-unknown-linux-${LIBC}
exit ;;
riscv32:Linux:*:* | riscv64:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
- echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
exit ;;
sh64*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
sh*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
sparc:Linux:*:* | sparc64:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
tile*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
vax:Linux:*:*)
- echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-dec-linux-${LIBC}
exit ;;
x86_64:Linux:*:*)
- echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
exit ;;
xtensa*:Linux:*:*)
- echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1063,34 +1075,34 @@ EOF
# I am not positive that other SVR4 systems won't match this,
# I just have to hope. -- rms.
# Use sysv4.2uw... so that sysv4* matches it.
- echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
+ echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
exit ;;
i*86:OS/2:*:*)
# If we were able to find `uname', then EMX Unix compatibility
# is probably installed.
- echo "$UNAME_MACHINE"-pc-os2-emx
+ echo ${UNAME_MACHINE}-pc-os2-emx
exit ;;
i*86:XTS-300:*:STOP)
- echo "$UNAME_MACHINE"-unknown-stop
+ echo ${UNAME_MACHINE}-unknown-stop
exit ;;
i*86:atheos:*:*)
- echo "$UNAME_MACHINE"-unknown-atheos
+ echo ${UNAME_MACHINE}-unknown-atheos
exit ;;
i*86:syllable:*:*)
- echo "$UNAME_MACHINE"-pc-syllable
+ echo ${UNAME_MACHINE}-pc-syllable
exit ;;
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
- echo i386-unknown-lynxos"$UNAME_RELEASE"
+ echo i386-unknown-lynxos${UNAME_RELEASE}
exit ;;
i*86:*DOS:*:*)
- echo "$UNAME_MACHINE"-pc-msdosdjgpp
+ echo ${UNAME_MACHINE}-pc-msdosdjgpp
exit ;;
- i*86:*:4.*:*)
- UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
+ i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
- echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+ echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
else
- echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+ echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
fi
exit ;;
i*86:*:5:[678]*)
@@ -1100,12 +1112,12 @@ EOF
*Pentium) UNAME_MACHINE=i586 ;;
*Pent*|*Celeron) UNAME_MACHINE=i686 ;;
esac
- echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
+ echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
exit ;;
i*86:*:3.2:*)
if test -f /usr/options/cb.name; then
UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
- echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+ echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
elif /bin/uname -X 2>/dev/null >/dev/null ; then
UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
@@ -1115,9 +1127,9 @@ EOF
&& UNAME_MACHINE=i686
(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
&& UNAME_MACHINE=i686
- echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+ echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
else
- echo "$UNAME_MACHINE"-pc-sysv32
+ echo ${UNAME_MACHINE}-pc-sysv32
fi
exit ;;
pc:*:*:*)
@@ -1137,9 +1149,9 @@ EOF
exit ;;
i860:*:4.*:*) # i860-SVR4
if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
- echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
else # Add other i860-SVR4 vendors below as they are discovered.
- echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4
+ echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
fi
exit ;;
mini*:CTIX:SYS*5:*)
@@ -1159,9 +1171,9 @@ EOF
test -r /etc/.relid \
&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4; exit; } ;;
@@ -1170,28 +1182,28 @@ EOF
test -r /etc/.relid \
&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
+ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
- && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
+ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
- echo m68k-unknown-lynxos"$UNAME_RELEASE"
+ echo m68k-unknown-lynxos${UNAME_RELEASE}
exit ;;
mc68030:UNIX_System_V:4.*:*)
echo m68k-atari-sysv4
exit ;;
TSUNAMI:LynxOS:2.*:*)
- echo sparc-unknown-lynxos"$UNAME_RELEASE"
+ echo sparc-unknown-lynxos${UNAME_RELEASE}
exit ;;
rs6000:LynxOS:2.*:*)
- echo rs6000-unknown-lynxos"$UNAME_RELEASE"
+ echo rs6000-unknown-lynxos${UNAME_RELEASE}
exit ;;
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
- echo powerpc-unknown-lynxos"$UNAME_RELEASE"
+ echo powerpc-unknown-lynxos${UNAME_RELEASE}
exit ;;
SM[BE]S:UNIX_SV:*:*)
- echo mips-dde-sysv"$UNAME_RELEASE"
+ echo mips-dde-sysv${UNAME_RELEASE}
exit ;;
RM*:ReliantUNIX-*:*:*)
echo mips-sni-sysv4
@@ -1202,7 +1214,7 @@ EOF
*:SINIX-*:*:*)
if uname -p 2>/dev/null >/dev/null ; then
UNAME_MACHINE=`(uname -p) 2>/dev/null`
- echo "$UNAME_MACHINE"-sni-sysv4
+ echo ${UNAME_MACHINE}-sni-sysv4
else
echo ns32k-sni-sysv
fi
@@ -1222,23 +1234,23 @@ EOF
exit ;;
i*86:VOS:*:*)
# From Paul.Green@stratus.com.
- echo "$UNAME_MACHINE"-stratus-vos
+ echo ${UNAME_MACHINE}-stratus-vos
exit ;;
*:VOS:*:*)
# From Paul.Green@stratus.com.
echo hppa1.1-stratus-vos
exit ;;
mc68*:A/UX:*:*)
- echo m68k-apple-aux"$UNAME_RELEASE"
+ echo m68k-apple-aux${UNAME_RELEASE}
exit ;;
news*:NEWS-OS:6*:*)
echo mips-sony-newsos6
exit ;;
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
if [ -d /usr/nec ]; then
- echo mips-nec-sysv"$UNAME_RELEASE"
+ echo mips-nec-sysv${UNAME_RELEASE}
else
- echo mips-unknown-sysv"$UNAME_RELEASE"
+ echo mips-unknown-sysv${UNAME_RELEASE}
fi
exit ;;
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
@@ -1257,39 +1269,39 @@ EOF
echo x86_64-unknown-haiku
exit ;;
SX-4:SUPER-UX:*:*)
- echo sx4-nec-superux"$UNAME_RELEASE"
+ echo sx4-nec-superux${UNAME_RELEASE}
exit ;;
SX-5:SUPER-UX:*:*)
- echo sx5-nec-superux"$UNAME_RELEASE"
+ echo sx5-nec-superux${UNAME_RELEASE}
exit ;;
SX-6:SUPER-UX:*:*)
- echo sx6-nec-superux"$UNAME_RELEASE"
+ echo sx6-nec-superux${UNAME_RELEASE}
exit ;;
SX-7:SUPER-UX:*:*)
- echo sx7-nec-superux"$UNAME_RELEASE"
+ echo sx7-nec-superux${UNAME_RELEASE}
exit ;;
SX-8:SUPER-UX:*:*)
- echo sx8-nec-superux"$UNAME_RELEASE"
+ echo sx8-nec-superux${UNAME_RELEASE}
exit ;;
SX-8R:SUPER-UX:*:*)
- echo sx8r-nec-superux"$UNAME_RELEASE"
+ echo sx8r-nec-superux${UNAME_RELEASE}
exit ;;
SX-ACE:SUPER-UX:*:*)
- echo sxace-nec-superux"$UNAME_RELEASE"
+ echo sxace-nec-superux${UNAME_RELEASE}
exit ;;
Power*:Rhapsody:*:*)
- echo powerpc-apple-rhapsody"$UNAME_RELEASE"
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
exit ;;
*:Rhapsody:*:*)
- echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
exit ;;
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
- eval "$set_cc_for_build"
+ eval $set_cc_for_build
if test "$UNAME_PROCESSOR" = unknown ; then
UNAME_PROCESSOR=powerpc
fi
- if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
+ if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
@@ -1317,7 +1329,7 @@ EOF
# that Apple uses in portable devices.
UNAME_PROCESSOR=x86_64
fi
- echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
+ echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
exit ;;
*:procnto*:*:* | *:QNX:[0123456789]*:*)
UNAME_PROCESSOR=`uname -p`
@@ -1325,25 +1337,22 @@ EOF
UNAME_PROCESSOR=i386
UNAME_MACHINE=pc
fi
- echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
+ echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
exit ;;
*:QNX:*:4*)
echo i386-pc-qnx
exit ;;
NEO-*:NONSTOP_KERNEL:*:*)
- echo neo-tandem-nsk"$UNAME_RELEASE"
+ echo neo-tandem-nsk${UNAME_RELEASE}
exit ;;
NSE-*:NONSTOP_KERNEL:*:*)
- echo nse-tandem-nsk"$UNAME_RELEASE"
+ echo nse-tandem-nsk${UNAME_RELEASE}
exit ;;
NSR-*:NONSTOP_KERNEL:*:*)
- echo nsr-tandem-nsk"$UNAME_RELEASE"
- exit ;;
- NSV-*:NONSTOP_KERNEL:*:*)
- echo nsv-tandem-nsk"$UNAME_RELEASE"
+ echo nsr-tandem-nsk${UNAME_RELEASE}
exit ;;
NSX-*:NONSTOP_KERNEL:*:*)
- echo nsx-tandem-nsk"$UNAME_RELEASE"
+ echo nsx-tandem-nsk${UNAME_RELEASE}
exit ;;
*:NonStop-UX:*:*)
echo mips-compaq-nonstopux
@@ -1352,7 +1361,7 @@ EOF
echo bs2000-siemens-sysv
exit ;;
DS/*:UNIX_System_V:*:*)
- echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
+ echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
exit ;;
*:Plan9:*:*)
# "uname -m" is not consistent, so use $cputype instead. 386
@@ -1363,7 +1372,7 @@ EOF
else
UNAME_MACHINE="$cputype"
fi
- echo "$UNAME_MACHINE"-unknown-plan9
+ echo ${UNAME_MACHINE}-unknown-plan9
exit ;;
*:TOPS-10:*:*)
echo pdp10-unknown-tops10
@@ -1384,14 +1393,14 @@ EOF
echo pdp10-unknown-its
exit ;;
SEI:*:*:SEIUX)
- echo mips-sei-seiux"$UNAME_RELEASE"
+ echo mips-sei-seiux${UNAME_RELEASE}
exit ;;
*:DragonFly:*:*)
- echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`"
+ echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
exit ;;
*:*VMS:*:*)
UNAME_MACHINE=`(uname -p) 2>/dev/null`
- case "$UNAME_MACHINE" in
+ case "${UNAME_MACHINE}" in
A*) echo alpha-dec-vms ; exit ;;
I*) echo ia64-dec-vms ; exit ;;
V*) echo vax-dec-vms ; exit ;;
@@ -1400,44 +1409,32 @@ EOF
echo i386-pc-xenix
exit ;;
i*86:skyos:*:*)
- echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`"
+ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
exit ;;
i*86:rdos:*:*)
- echo "$UNAME_MACHINE"-pc-rdos
+ echo ${UNAME_MACHINE}-pc-rdos
exit ;;
i*86:AROS:*:*)
- echo "$UNAME_MACHINE"-pc-aros
+ echo ${UNAME_MACHINE}-pc-aros
exit ;;
x86_64:VMkernel:*:*)
- echo "$UNAME_MACHINE"-unknown-esx
+ echo ${UNAME_MACHINE}-unknown-esx
exit ;;
amd64:Isilon\ OneFS:*:*)
echo x86_64-unknown-onefs
exit ;;
esac
-echo "$0: unable to guess system type" >&2
-
-case "$UNAME_MACHINE:$UNAME_SYSTEM" in
- mips:Linux | mips64:Linux)
- # If we got here on MIPS GNU/Linux, output extra information.
- cat >&2 <<EOF
-
-NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
-the system type. Please install a C compiler and try again.
-EOF
- ;;
-esac
-
cat >&2 <<EOF
+$0: unable to guess system type
This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite *all*
-copies of config.guess and config.sub with the latest versions from:
+operating system you are using. If your script is old, overwrite
+config.guess and config.sub with the latest versions from:
- https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
and
- https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
If $0 has already been updated, send the following data and any
information you think might be pertinent to config-patches@gnu.org to
@@ -1460,16 +1457,16 @@ hostinfo = `(hostinfo) 2>/dev/null`
/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
-UNAME_MACHINE = "$UNAME_MACHINE"
-UNAME_RELEASE = "$UNAME_RELEASE"
-UNAME_SYSTEM = "$UNAME_SYSTEM"
-UNAME_VERSION = "$UNAME_VERSION"
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
EOF
exit 1
# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
diff --git a/dist2/config.sub b/dist2/config.sub
index 9ccf09a7..40ea5dfe 100755
--- a/dist2/config.sub
+++ b/dist2/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2018 Free Software Foundation, Inc.
+# Copyright 1992-2017 Free Software Foundation, Inc.
-timestamp='2018-03-08'
+timestamp='2017-04-02'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2018-03-08'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -33,7 +33,7 @@ timestamp='2018-03-08'
# Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
@@ -57,7 +57,7 @@ Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
Canonicalize a configuration name.
-Options:
+Operation modes:
-h, --help print this help, then exit
-t, --time-stamp print date of last modification, then exit
-v, --version print version number, then exit
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2018 Free Software Foundation, Inc.
+Copyright 1992-2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -94,7 +94,7 @@ while test $# -gt 0 ; do
*local*)
# First pass through any local machine types.
- echo "$1"
+ echo $1
exit ;;
* )
@@ -112,7 +112,7 @@ esac
# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
@@ -120,16 +120,16 @@ case $maybe_os in
kopensolaris*-gnu* | cloudabi*-eabi* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
- basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
;;
android-linux)
os=-linux-android
- basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
;;
*)
- basic_machine=`echo "$1" | sed 's/-[^-]*$//'`
- if [ "$basic_machine" != "$1" ]
- then os=`echo "$1" | sed 's/.*-/-/'`
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+ if [ $basic_machine != $1 ]
+ then os=`echo $1 | sed 's/.*-/-/'`
else os=; fi
;;
esac
@@ -178,44 +178,44 @@ case $os in
;;
-sco6)
os=-sco5v6
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco5)
os=-sco3.2v5
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco4)
os=-sco3.2v4
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco3.2.[4-9]*)
os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco3.2v[4-9]*)
# Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco5v6*)
# Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-sco*)
os=-sco3.2v2
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-udk*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-isc)
os=-isc2.2
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-clix*)
basic_machine=clipper-intergraph
;;
-isc*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
-lynx*178)
os=-lynxos178
@@ -227,7 +227,10 @@ case $os in
os=-lynxos
;;
-ptx*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ ;;
+ -windowsnt*)
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
;;
-psos*)
os=-psos
@@ -296,7 +299,7 @@ case $basic_machine in
| nios | nios2 | nios2eb | nios2el \
| ns16k | ns32k \
| open8 | or1k | or1knd | or32 \
- | pdp10 | pj | pjl \
+ | pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pru \
| pyramid \
@@ -313,6 +316,7 @@ case $basic_machine in
| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
| visium \
| wasm32 \
+ | we32k \
| x86 | xc16x | xstormy16 | xtensa \
| z8k | z80)
basic_machine=$basic_machine-unknown
@@ -333,7 +337,7 @@ case $basic_machine in
basic_machine=$basic_machine-unknown
os=-none
;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65)
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
;;
ms1)
basic_machine=mt-unknown
@@ -362,7 +366,7 @@ case $basic_machine in
;;
# Object if more than one company name word.
*-*-*)
- echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
exit 1
;;
# Recognize the basic CPU types with company name.
@@ -457,7 +461,7 @@ case $basic_machine in
# Recognize the various machine names and aliases which stand
# for a CPU type and a company and sometimes even an OS.
386bsd)
- basic_machine=i386-pc
+ basic_machine=i386-unknown
os=-bsd
;;
3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
@@ -491,7 +495,7 @@ case $basic_machine in
basic_machine=x86_64-pc
;;
amd64-*)
- basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
amdahl)
basic_machine=580-amdahl
@@ -536,7 +540,7 @@ case $basic_machine in
os=-linux
;;
blackfin-*)
- basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux
;;
bluegene*)
@@ -544,13 +548,13 @@ case $basic_machine in
os=-cnk
;;
c54x-*)
- basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c55x-*)
- basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c6x-*)
- basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c90)
basic_machine=c90-cray
@@ -639,7 +643,7 @@ case $basic_machine in
basic_machine=rs6000-bull
os=-bosx
;;
- dpx2*)
+ dpx2* | dpx2*-bull)
basic_machine=m68k-bull
os=-sysv3
;;
@@ -648,7 +652,7 @@ case $basic_machine in
os=$os"spe"
;;
e500v[12]-*)
- basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
os=$os"spe"
;;
ebmon29k)
@@ -740,6 +744,9 @@ case $basic_machine in
hp9k8[0-9][0-9] | hp8[0-9][0-9])
basic_machine=hppa1.0-hp
;;
+ hppa-next)
+ os=-nextstep3
+ ;;
hppaosf)
basic_machine=hppa1.1-hp
os=-osf
@@ -752,26 +759,26 @@ case $basic_machine in
basic_machine=i370-ibm
;;
i*86v32)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv32
;;
i*86v4*)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv4
;;
i*86v)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv
;;
i*86sol2)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-solaris2
;;
i386mach)
basic_machine=i386-mach
os=-mach
;;
- vsta)
+ i386-vsta | vsta)
basic_machine=i386-unknown
os=-vsta
;;
@@ -790,16 +797,19 @@ case $basic_machine in
os=-sysv
;;
leon-*|leon[3-9]-*)
- basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'`
+ basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
;;
m68knommu)
basic_machine=m68k-unknown
os=-linux
;;
m68knommu-*)
- basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux
;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
magnum | m3230)
basic_machine=mips-mips
os=-sysv
@@ -831,10 +841,10 @@ case $basic_machine in
os=-mint
;;
mips3*-*)
- basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
;;
mips3*)
- basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
;;
monitor)
basic_machine=m68k-rom68k
@@ -853,7 +863,7 @@ case $basic_machine in
os=-msdos
;;
ms1-*)
- basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'`
+ basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
;;
msys)
basic_machine=i686-pc
@@ -895,7 +905,7 @@ case $basic_machine in
basic_machine=v70-nec
os=-sysv
;;
- next | m*-next)
+ next | m*-next )
basic_machine=m68k-next
case $os in
-nextstep* )
@@ -940,9 +950,6 @@ case $basic_machine in
nsr-tandem)
basic_machine=nsr-tandem
;;
- nsv-tandem)
- basic_machine=nsv-tandem
- ;;
nsx-tandem)
basic_machine=nsx-tandem
;;
@@ -978,7 +985,7 @@ case $basic_machine in
os=-linux
;;
parisc-*)
- basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux
;;
pbd)
@@ -994,7 +1001,7 @@ case $basic_machine in
basic_machine=i386-pc
;;
pc98-*)
- basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
pentium | p5 | k5 | k6 | nexgen | viac3)
basic_machine=i586-pc
@@ -1009,16 +1016,16 @@ case $basic_machine in
basic_machine=i786-pc
;;
pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
- basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
- basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
pentium4-*)
- basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
pn)
basic_machine=pn-gould
@@ -1028,23 +1035,23 @@ case $basic_machine in
ppc | ppcbe) basic_machine=powerpc-unknown
;;
ppc-* | ppcbe-*)
- basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
ppcle | powerpclittle)
basic_machine=powerpcle-unknown
;;
ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
ppc64) basic_machine=powerpc64-unknown
;;
- ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
ppc64le | powerpc64little)
basic_machine=powerpc64le-unknown
;;
ppc64le-* | powerpc64little-*)
- basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
ps2)
basic_machine=i386-ibm
@@ -1098,10 +1105,17 @@ case $basic_machine in
sequent)
basic_machine=i386-sequent
;;
+ sh)
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
sh5el)
basic_machine=sh5le-unknown
;;
- simso-wrs)
+ sh64)
+ basic_machine=sh64-unknown
+ ;;
+ sparclite-wrs | simso-wrs)
basic_machine=sparclite-wrs
os=-vxworks
;;
@@ -1120,7 +1134,7 @@ case $basic_machine in
os=-sysv4
;;
strongarm-* | thumb-*)
- basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
sun2)
basic_machine=m68000-sun
@@ -1234,6 +1248,9 @@ case $basic_machine in
basic_machine=a29k-wrs
os=-vxworks
;;
+ wasm32)
+ basic_machine=wasm32-unknown
+ ;;
w65*)
basic_machine=w65-wdc
os=-none
@@ -1242,9 +1259,6 @@ case $basic_machine in
basic_machine=hppa1.1-winbond
os=-proelf
;;
- x64)
- basic_machine=x86_64-pc
- ;;
xbox)
basic_machine=i686-pc
os=-mingw32
@@ -1253,12 +1267,20 @@ case $basic_machine in
basic_machine=xps100-honeywell
;;
xscale-* | xscalee[bl]-*)
- basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'`
+ basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
;;
ymp)
basic_machine=ymp-cray
os=-unicos
;;
+ z8k-*-coff)
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
+ z80-*-coff)
+ basic_machine=z80-unknown
+ os=-sim
+ ;;
none)
basic_machine=none-none
os=-none
@@ -1287,6 +1309,10 @@ case $basic_machine in
vax)
basic_machine=vax-dec
;;
+ pdp10)
+ # there are many clones, so DEC is not a safe bet
+ basic_machine=pdp10-unknown
+ ;;
pdp11)
basic_machine=pdp11-dec
;;
@@ -1296,6 +1322,9 @@ case $basic_machine in
sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
basic_machine=sh-unknown
;;
+ sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+ basic_machine=sparc-sun
+ ;;
cydra)
basic_machine=cydra-cydrome
;;
@@ -1315,7 +1344,7 @@ case $basic_machine in
# Make sure to match an already-canonicalized machine name.
;;
*)
- echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
exit 1
;;
esac
@@ -1323,10 +1352,10 @@ esac
# Here we canonicalize certain aliases for manufacturers.
case $basic_machine in
*-digital*)
- basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'`
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
;;
*-commodore*)
- basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'`
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
;;
*)
;;
@@ -1337,8 +1366,8 @@ esac
if [ x"$os" != x"" ]
then
case $os in
- # First match some system type aliases that might get confused
- # with valid system types.
+ # First match some system type aliases
+ # that might get confused with valid system types.
# -solaris* is a basic system type, with this one exception.
-auroraux)
os=-auroraux
@@ -1349,19 +1378,18 @@ case $os in
-solaris)
os=-solaris2
;;
+ -svr4*)
+ os=-sysv4
+ ;;
-unixware*)
os=-sysv4.2uw
;;
-gnu/linux*)
os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
;;
- # es1800 is here to avoid being matched by es* (a different OS)
- -es1800*)
- os=-ose
- ;;
- # Now accept the basic system types.
+ # First accept the basic system types.
# The portable systems comes first.
- # Each alternative MUST end in a * to match a version number.
+ # Each alternative MUST END IN A *, to match a version number.
# -sysv* is not here because it comes later, after sysvr4.
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
@@ -1371,26 +1399,25 @@ case $os in
| -aos* | -aros* | -cloudabi* | -sortix* \
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \
+ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
| -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
| -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
- | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \
+ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
- | -morphos* | -superux* | -rtmk* | -windiss* \
+ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
- | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \
- | -midnightbsd*)
+ | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1407,12 +1434,12 @@ case $os in
-nto*)
os=`echo $os | sed -e 's|nto|nto-qnx|'`
;;
- -sim | -xray | -os68k* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* \
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
| -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
;;
-mac*)
- os=`echo "$os" | sed -e 's|mac|macos|'`
+ os=`echo $os | sed -e 's|mac|macos|'`
;;
-linux-dietlibc)
os=-linux-dietlibc
@@ -1421,10 +1448,10 @@ case $os in
os=`echo $os | sed -e 's|linux|linux-gnu|'`
;;
-sunos5*)
- os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
;;
-sunos6*)
- os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
;;
-opened*)
os=-openedition
@@ -1435,6 +1462,12 @@ case $os in
-wince*)
os=-wince
;;
+ -osfrose*)
+ os=-osfrose
+ ;;
+ -osf*)
+ os=-osf
+ ;;
-utek*)
os=-bsd
;;
@@ -1459,7 +1492,7 @@ case $os in
-nova*)
os=-rtmk-nova
;;
- -ns2)
+ -ns2 )
os=-nextstep2
;;
-nsk*)
@@ -1481,7 +1514,7 @@ case $os in
-oss*)
os=-sysv3
;;
- -svr4*)
+ -svr4)
os=-sysv4
;;
-svr3)
@@ -1496,28 +1529,24 @@ case $os in
-ose*)
os=-ose
;;
+ -es1800*)
+ os=-ose
+ ;;
+ -xenix)
+ os=-xenix
+ ;;
-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
os=-mint
;;
+ -aros*)
+ os=-aros
+ ;;
-zvmoe)
os=-zvmoe
;;
-dicos*)
os=-dicos
;;
- -pikeos*)
- # Until real need of OS specific support for
- # particular features comes up, bare metal
- # configurations are quite functional.
- case $basic_machine in
- arm*)
- os=-eabi
- ;;
- *)
- os=-elf
- ;;
- esac
- ;;
-nacl*)
;;
-ios)
@@ -1527,7 +1556,7 @@ case $os in
*)
# Get rid of the `-' at the beginning of $os.
os=`echo $os | sed 's/[^-]*-//'`
- echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
exit 1
;;
esac
@@ -1623,6 +1652,9 @@ case $basic_machine in
*-be)
os=-beos
;;
+ *-haiku)
+ os=-haiku
+ ;;
*-ibm)
os=-aix
;;
@@ -1662,7 +1694,7 @@ case $basic_machine in
m88k-omron*)
os=-luna
;;
- *-next)
+ *-next )
os=-nextstep
;;
*-sequent)
@@ -1677,6 +1709,9 @@ case $basic_machine in
i370-*)
os=-mvs
;;
+ *-next)
+ os=-nextstep3
+ ;;
*-gould)
os=-sysv
;;
@@ -1786,15 +1821,15 @@ case $basic_machine in
vendor=stratus
;;
esac
- basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"`
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
;;
esac
-echo "$basic_machine$os"
+echo $basic_machine$os
exit
# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
diff --git a/dist2/configure b/dist2/configure
index 1b3b0285..6091d75b 100755
--- a/dist2/configure
+++ b/dist2/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for PCRE2 10.33.
+# Generated by GNU Autoconf 2.69 for PCRE2 10.32.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='PCRE2'
PACKAGE_TARNAME='pcre2'
-PACKAGE_VERSION='10.33'
-PACKAGE_STRING='PCRE2 10.33'
+PACKAGE_VERSION='10.32'
+PACKAGE_STRING='PCRE2 10.32'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -677,6 +677,8 @@ WITH_PCRE2_16_FALSE
WITH_PCRE2_16_TRUE
WITH_PCRE2_8_FALSE
WITH_PCRE2_8_TRUE
+PCRE2_HAVE_INTTYPES_H
+PCRE2_HAVE_STDINT_H
enable_pcre2_32
enable_pcre2_16
enable_pcre2_8
@@ -726,6 +728,7 @@ am__nodep
AMDEPBACKSLASH
AMDEP_FALSE
AMDEP_TRUE
+am__quote
am__include
DEPDIR
OBJEXT
@@ -799,8 +802,7 @@ PACKAGE_VERSION
PACKAGE_TARNAME
PACKAGE_NAME
PATH_SEPARATOR
-SHELL
-am__quote'
+SHELL'
ac_subst_files=''
ac_user_opts='
enable_option_checking
@@ -825,7 +827,6 @@ enable_jit
enable_jit_sealloc
enable_pcre2grep_jit
enable_pcre2grep_callout
-enable_pcre2grep_callout_fork
enable_rebuild_chartables
enable_unicode
enable_newline_is_cr
@@ -854,7 +855,6 @@ enable_valgrind
enable_coverage
enable_fuzz_support
enable_stack_for_recursion
-enable_percent_zt
'
ac_precious_vars='build_alias
host_alias
@@ -1413,7 +1413,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures PCRE2 10.33 to adapt to many kinds of systems.
+\`configure' configures PCRE2 10.32 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1483,7 +1483,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of PCRE2 10.33:";;
+ short | recursive ) echo "Configuration of PCRE2 10.32:";;
esac
cat <<\_ACEOF
@@ -1512,8 +1512,6 @@ Optional Features:
--disable-pcre2grep-jit disable JIT support in pcre2grep
--disable-pcre2grep-callout
disable callout script support in pcre2grep
- --disable-pcre2grep-callout-fork
- disable callout script fork support in pcre2grep
--enable-rebuild-chartables
rebuild character tables in current locale
--disable-unicode disable Unicode support
@@ -1543,7 +1541,6 @@ Optional Features:
--enable-valgrind enable valgrind support
--enable-coverage enable code coverage reports using gcov
--enable-fuzz-support enable fuzzer support
- --disable-percent-zt disable the use of z and t formatting modifiers
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1662,7 +1659,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-PCRE2 configure 10.33
+PCRE2 configure 10.32
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2157,7 +2154,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by PCRE2 $as_me 10.33, which was
+It was created by PCRE2 $as_me 10.32, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2506,7 +2503,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
-am__api_version='1.16'
+am__api_version='1.15'
ac_aux_dir=
for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
@@ -3021,7 +3018,7 @@ fi
# Define the identity of the package.
PACKAGE='pcre2'
- VERSION='10.33'
+ VERSION='10.32'
cat >>confdefs.h <<_ACEOF
@@ -3051,8 +3048,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
# For better backward compatibility. To be removed once Automake 1.9.x
# dies out for good. For more background, see:
-# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
-# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target (and possibly the TAP driver). The
@@ -3103,7 +3100,7 @@ END
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
-that behaves properly: <https://www.gnu.org/software/coreutils/>.
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
@@ -4020,45 +4017,45 @@ DEPDIR="${am__leading_dot}deps"
ac_config_commands="$ac_config_commands depfiles"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5
-$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; }
-cat > confinc.mk << 'END'
+
+am_make=${MAKE-make}
+cat > confinc << 'END'
am__doit:
- @echo this is the am__doit target >confinc.out
+ @echo this is the am__doit target
.PHONY: am__doit
END
+# If we don't find an include directive, just comment out the code.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5
+$as_echo_n "checking for style of include used by $am_make... " >&6; }
am__include="#"
am__quote=
-# BSD make does it like this.
-echo '.include "confinc.mk" # ignored' > confmf.BSD
-# Other make implementations (GNU, Solaris 10, AIX) do it like this.
-echo 'include confinc.mk # ignored' > confmf.GNU
-_am_result=no
-for s in GNU BSD; do
- { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
- (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
- case $?:`cat confinc.out 2>/dev/null` in #(
- '0:this is the am__doit target') :
- case $s in #(
- BSD) :
- am__include='.include' am__quote='"' ;; #(
- *) :
- am__include='include' am__quote='' ;;
-esac ;; #(
- *) :
- ;;
+_am_result=none
+# First try GNU make style include.
+echo "include confinc" > confmf
+# Ignore all kinds of additional output from 'make'.
+case `$am_make -s -f confmf 2> /dev/null` in #(
+*the\ am__doit\ target*)
+ am__include=include
+ am__quote=
+ _am_result=GNU
+ ;;
esac
- if test "$am__include" != "#"; then
- _am_result="yes ($s style)"
- break
- fi
-done
-rm -f confinc.* confmf.*
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
-$as_echo "${_am_result}" >&6; }
+# Now try BSD make style include.
+if test "$am__include" = "#"; then
+ echo '.include "confinc"' > confmf
+ case `$am_make -s -f confmf 2> /dev/null` in #(
+ *the\ am__doit\ target*)
+ am__include=.include
+ am__quote="\""
+ _am_result=BSD
+ ;;
+ esac
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5
+$as_echo "$_am_result" >&6; }
+rm -f confinc confmf
# Check whether --enable-dependency-tracking was given.
if test "${enable_dependency_tracking+set}" = set; then :
@@ -5249,8 +5246,8 @@ esac
-macro_version='2.4.6.42-b88ce'
-macro_revision='2.4.6.42'
+macro_version='2.4.6.40-6ca5-dirty'
+macro_revision='2.4.6.40'
@@ -7176,7 +7173,7 @@ for ac_symprfx in "" "_"; do
if test "$lt_cv_nm_interface" = "MS dumpbin"; then
# Fake it for dumpbin and say T for any non-static function,
# D for any global variable and I for any imported variable.
- # Also find C++ and __fastcall symbols from MSVC++ or ICC,
+ # Also find C++ and __fastcall symbols from MSVC++,
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK '"\
" {last_section=section; section=\$ 3};"\
@@ -8840,8 +8837,8 @@ esac
ofile=libtool
can_build_shared=yes
-# All known linkers require a '.a' archive for static linking (except MSVC and
-# ICC, which need '.lib').
+# All known linkers require a '.a' archive for static linking (except MSVC,
+# which needs '.lib').
libext=a
with_gnu_ld=$lt_cv_prog_gnu_ld
@@ -9768,15 +9765,15 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
case $host_os in
cygwin* | mingw* | pw32* | cegcc*)
- # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time
+ # FIXME: the MSVC++ port hasn't been tested in a loooong time
# When not using gcc, we currently assume that we are using
- # Microsoft Visual C++ or Intel C++ Compiler.
+ # Microsoft Visual C++.
if test yes != "$GCC"; then
with_gnu_ld=no
fi
;;
interix*)
- # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC)
+ # we just hope/assume this is gcc and not c89 (= MSVC++)
with_gnu_ld=yes
;;
openbsd* | bitrig*)
@@ -10425,12 +10422,12 @@ fi
cygwin* | mingw* | pw32* | cegcc*)
# When not using gcc, we currently assume that we are using
- # Microsoft Visual C++ or Intel C++ Compiler.
+ # Microsoft Visual C++.
# hardcode_libdir_flag_spec is actually meaningless, as there is
# no search path for DLLs.
case $cc_basename in
- cl* | icl*)
- # Native MSVC or ICC
+ cl*)
+ # Native MSVC
hardcode_libdir_flag_spec=' '
allow_undefined_flag=unsupported
always_export_symbols=yes
@@ -10471,7 +10468,7 @@ fi
fi'
;;
*)
- # Assume MSVC and ICC wrapper
+ # Assume MSVC wrapper
hardcode_libdir_flag_spec=' '
allow_undefined_flag=unsupported
# Tell ltmain to make .lib files, not .a files.
@@ -11503,8 +11500,8 @@ cygwin* | mingw* | pw32* | cegcc*)
dynamic_linker='Win32 ld.exe'
;;
- *,cl* | *,icl*)
- # Native MSVC or ICC
+ *,cl*)
+ # Native MSVC
libname_spec='$name'
soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext'
library_names_spec='$libname.dll.lib'
@@ -11560,7 +11557,7 @@ cygwin* | mingw* | pw32* | cegcc*)
;;
*)
- # Assume MSVC and ICC wrapper
+ # Assume MSVC wrapper
library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib'
dynamic_linker='Win32 ld.exe'
;;
@@ -12981,9 +12978,9 @@ _ACEOF
# Versioning
PCRE2_MAJOR="10"
-PCRE2_MINOR="33"
+PCRE2_MINOR="32"
PCRE2_PRERELEASE=""
-PCRE2_DATE="2019-04-16"
+PCRE2_DATE="2018-09-10"
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
then
@@ -13064,7 +13061,7 @@ fi
-# Handle --enable-debug (disabled by default)
+# Handle --dnable-debug (disabled by default)
# Check whether --enable-debug was given.
if test "${enable_debug+set}" = set; then :
enableval=$enable_debug;
@@ -13090,8 +13087,6 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
- SAVE_CPPFLAGS=$CPPFLAGS
- CPPFLAGS=-I$srcdir
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -13107,7 +13102,6 @@ else
enable_jit=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- CPPFLAGS=$SAVE_CPPFLAGS
echo checking for JIT support on this hardware... $enable_jit
fi
@@ -13138,15 +13132,6 @@ else
fi
-# Handle --disable-pcre2grep-callout-fork (enabled by default)
-# Check whether --enable-pcre2grep-callout-fork was given.
-if test "${enable_pcre2grep_callout_fork+set}" = set; then :
- enableval=$enable_pcre2grep_callout_fork;
-else
- enable_pcre2grep_callout_fork=yes
-fi
-
-
# Handle --enable-rebuild-chartables
# Check whether --enable-rebuild-chartables was given.
if test "${enable_rebuild_chartables+set}" = set; then :
@@ -13401,15 +13386,6 @@ fi
# [don't use stack recursion when matching]),
# , enable_stack_for_recursion=yes)
-# Handle --disable-percent_zt (set as "auto" by default)
-# Check whether --enable-percent-zt was given.
-if test "${enable_percent_zt+set}" = set; then :
- enableval=$enable_percent_zt;
-else
- enable_percent_zt=auto
-fi
-
-
# Set the default value for pcre2-8
if test "x$enable_pcre2_8" = "xunset"
then
@@ -13637,6 +13613,37 @@ fi
done
+for ac_header in stdint.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "stdint.h" "ac_cv_header_stdint_h" "$ac_includes_default"
+if test "x$ac_cv_header_stdint_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_STDINT_H 1
+_ACEOF
+ PCRE2_HAVE_STDINT_H=1
+else
+ PCRE2_HAVE_STDINT_H=0
+fi
+
+done
+
+for ac_header in inttypes.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "inttypes.h" "ac_cv_header_inttypes_h" "$ac_includes_default"
+if test "x$ac_cv_header_inttypes_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_INTTYPES_H 1
+_ACEOF
+ PCRE2_HAVE_INTTYPES_H=1
+else
+ PCRE2_HAVE_INTTYPES_H=0
+fi
+
+done
+
+
+
+
# Conditional compilation
if test "x$enable_pcre2_8" = "xyes"; then
WITH_PCRE2_8_TRUE=
@@ -14351,14 +14358,6 @@ $as_echo "#define PCRE2_DEBUG /**/" >>confdefs.h
fi
-if test "$enable_percent_zt" = "no"; then
-
-$as_echo "#define DISABLE_PERCENT_ZT /**/" >>confdefs.h
-
-else
- enable_percent_zt=auto
-fi
-
# Unless running under Windows, JIT support requires pthreads.
if test "$enable_jit" = "yes"; then
@@ -14783,21 +14782,14 @@ $as_echo "#define SUPPORT_PCRE2GREP_JIT /**/" >>confdefs.h
fi
if test "$enable_pcre2grep_callout" = "yes"; then
- if test "$enable_pcre2grep_callout_fork" = "yes"; then
- if test "$HAVE_WINDOWS_H" != "1"; then
- if test "$HAVE_SYS_WAIT_H" != "1"; then
- as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5
- fi
+ if test "$HAVE_WINDOWS_H" != "1"; then
+ if test "$HAVE_SYS_WAIT_H" != "1"; then
+ as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5
fi
-
-$as_echo "#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/" >>confdefs.h
-
fi
$as_echo "#define SUPPORT_PCRE2GREP_CALLOUT /**/" >>confdefs.h
-else
- enable_pcre2grep_callout_fork="no"
fi
if test "$enable_unicode" = "yes"; then
@@ -14972,16 +14964,16 @@ esac
# are m4 variables, assigned above.
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
- $NO_UNDEFINED -version-info 8:0:8"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
- $NO_UNDEFINED -version-info 8:0:8"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
- $NO_UNDEFINED -version-info 8:0:8"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
- $NO_UNDEFINED -version-info 2:2:0"
+ $NO_UNDEFINED -version-info 2:1:0"
@@ -16024,7 +16016,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by PCRE2 $as_me 10.33, which was
+This file was extended by PCRE2 $as_me 10.32, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16090,7 +16082,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-PCRE2 config.status 10.33
+PCRE2 config.status 10.32
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -16209,7 +16201,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
#
# INIT-COMMANDS
#
-AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
+AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"
# The HP-UX ksh and POSIX shell print the target directory to stdout
@@ -17115,35 +17107,29 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
- # TODO: see whether this extra hack can be removed once we start
- # requiring Autoconf 2.70 or later.
- case $CONFIG_FILES in #(
- *\'*) :
- eval set x "$CONFIG_FILES" ;; #(
- *) :
- set x $CONFIG_FILES ;; #(
- *) :
- ;;
-esac
+ case $CONFIG_FILES in
+ *\'*) eval set x "$CONFIG_FILES" ;;
+ *) set x $CONFIG_FILES ;;
+ esac
shift
- # Used to flag and report bootstrapping failures.
- am_rc=0
- for am_mf
+ for mf
do
# Strip MF so we end up with the name of the file.
- am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile which includes
- # dependency-tracking related rules and includes.
- # Grep'ing the whole file directly is not great: AIX grep has a line
+ mf=`echo "$mf" | sed -e 's/:.*$//'`
+ # Check whether this is an Automake generated Makefile or not.
+ # We used to match only the files named 'Makefile.in', but
+ # some people rename them; so instead we look at the file content.
+ # Grep'ing the first line is not enough: some people post-process
+ # each Makefile.in and add a new line on top of each file to say so.
+ # Grep'ing the whole file is not good either: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
- sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
- || continue
- am_dirpart=`$as_dirname -- "$am_mf" ||
-$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$am_mf" : 'X\(//\)[^/]' \| \
- X"$am_mf" : 'X\(//\)$' \| \
- X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$am_mf" |
+ if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
+ dirpart=`$as_dirname -- "$mf" ||
+$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$mf" : 'X\(//\)[^/]' \| \
+ X"$mf" : 'X\(//\)$' \| \
+ X"$mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$mf" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
@@ -17161,48 +17147,53 @@ $as_echo X"$am_mf" |
q
}
s/.*/./; q'`
- am_filepart=`$as_basename -- "$am_mf" ||
-$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
- X"$am_mf" : 'X\(//\)$' \| \
- X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$am_mf" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{
+ else
+ continue
+ fi
+ # Extract the definition of DEPDIR, am__include, and am__quote
+ # from the Makefile without running 'make'.
+ DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
+ test -z "$DEPDIR" && continue
+ am__include=`sed -n 's/^am__include = //p' < "$mf"`
+ test -z "$am__include" && continue
+ am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
+ # Find all dependency output files, they are included files with
+ # $(DEPDIR) in their names. We invoke sed twice because it is the
+ # simplest approach to changing $(DEPDIR) to its actual value in the
+ # expansion.
+ for file in `sed -n "
+ s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
+ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
+ # Make sure the directory exists.
+ test -f "$dirpart/$file" && continue
+ fdir=`$as_dirname -- "$file" ||
+$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$file" : 'X\(//\)[^/]' \| \
+ X"$file" : 'X\(//\)$' \| \
+ X"$file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
- /^X\/\(\/\/\)$/{
+ /^X\(\/\/\)[^/].*/{
s//\1/
q
}
- /^X\/\(\/\).*/{
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'`
- { echo "$as_me:$LINENO: cd "$am_dirpart" \
- && sed -e '/# am--include-marker/d' "$am_filepart" \
- | $MAKE -f - am--depfiles" >&5
- (cd "$am_dirpart" \
- && sed -e '/# am--include-marker/d' "$am_filepart" \
- | $MAKE -f - am--depfiles) >&5 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } || am_rc=$?
+ as_dir=$dirpart/$fdir; as_fn_mkdir_p
+ # echo "creating $dirpart/$file"
+ echo '# dummy' > "$dirpart/$file"
+ done
done
- if test $am_rc -ne 0; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "Something went wrong bootstrapping makefile fragments
- for automatic dependency tracking. Try re-running configure with the
- '--disable-dependency-tracking' option to at least be able to build
- the package (albeit without support for automatic dependency tracking).
-See \`config.log' for more details" "$LINENO" 5; }
- fi
- { am_dirpart=; unset am_dirpart;}
- { am_filepart=; unset am_filepart;}
- { am_mf=; unset am_mf;}
- { am_rc=; unset am_rc;}
- rm -f conftest-deps.mk
}
;;
"libtool":C)
@@ -17849,7 +17840,6 @@ $PACKAGE-$VERSION configuration summary:
Build static libs .................. : ${enable_static}
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
- Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
@@ -17859,7 +17849,6 @@ $PACKAGE-$VERSION configuration summary:
Valgrind support ................... : ${enable_valgrind}
Code coverage ...................... : ${enable_coverage}
Fuzzer support ..................... : ${enable_fuzz_support}
- Use %zu and %td .................... : ${enable_percent_zt}
EOF
diff --git a/dist2/configure.ac b/dist2/configure.ac
index 93c2b531..c43ae380 100644
--- a/dist2/configure.ac
+++ b/dist2/configure.ac
@@ -9,18 +9,18 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [33])
+m4_define(pcre2_minor, [32])
m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2019-04-16])
+m4_define(pcre2_date, [2018-09-10])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
# Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version, [8:0:8])
-m4_define(libpcre2_16_version, [8:0:8])
-m4_define(libpcre2_32_version, [8:0:8])
-m4_define(libpcre2_posix_version, [2:2:0])
+m4_define(libpcre2_8_version, [7:1:7])
+m4_define(libpcre2_16_version, [7:1:7])
+m4_define(libpcre2_32_version, [7:1:7])
+m4_define(libpcre2_posix_version, [2:1:0])
AC_PREREQ(2.57)
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
@@ -131,7 +131,7 @@ AC_ARG_ENABLE(pcre2-32,
, enable_pcre2_32=unset)
AC_SUBST(enable_pcre2_32)
-# Handle --enable-debug (disabled by default)
+# Handle --dnable-debug (disabled by default)
AC_ARG_ENABLE(debug,
AS_HELP_STRING([--enable-debug],
[enable debugging code]),
@@ -146,15 +146,12 @@ AC_ARG_ENABLE(jit,
# This code enables JIT if the hardware supports it.
if test "$enable_jit" = "auto"; then
AC_LANG(C)
- SAVE_CPPFLAGS=$CPPFLAGS
- CPPFLAGS=-I$srcdir
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#define SLJIT_CONFIG_AUTO 1
#include "src/sljit/sljitConfigInternal.h"
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#error unsupported
#endif]])], enable_jit=yes, enable_jit=no)
- CPPFLAGS=$SAVE_CPPFLAGS
echo checking for JIT support on this hardware... $enable_jit
fi
@@ -176,12 +173,6 @@ AC_ARG_ENABLE(pcre2grep-callout,
[disable callout script support in pcre2grep]),
, enable_pcre2grep_callout=yes)
-# Handle --disable-pcre2grep-callout-fork (enabled by default)
-AC_ARG_ENABLE(pcre2grep-callout-fork,
- AS_HELP_STRING([--disable-pcre2grep-callout-fork],
- [disable callout script fork support in pcre2grep]),
- , enable_pcre2grep_callout_fork=yes)
-
# Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables,
AS_HELP_STRING([--enable-rebuild-chartables],
@@ -352,12 +343,6 @@ AC_ARG_ENABLE(stack-for-recursion,,
# [don't use stack recursion when matching]),
# , enable_stack_for_recursion=yes)
-# Handle --disable-percent_zt (set as "auto" by default)
-AC_ARG_ENABLE(percent-zt,
- AS_HELP_STRING([--disable-percent-zt],
- [disable the use of z and t formatting modifiers]),
- , enable_percent_zt=auto)
-
# Set the default value for pcre2-8
if test "x$enable_pcre2_8" = "xunset"
then
@@ -466,6 +451,11 @@ AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
+AC_CHECK_HEADERS([stdint.h], [PCRE2_HAVE_STDINT_H=1], [PCRE2_HAVE_STDINT_H=0])
+AC_CHECK_HEADERS([inttypes.h], [PCRE2_HAVE_INTTYPES_H=1], [PCRE2_HAVE_INTTYPES_H=0])
+AC_SUBST([PCRE2_HAVE_STDINT_H])
+AC_SUBST([PCRE2_HAVE_INTTYPES_H])
+
# Conditional compilation
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
@@ -602,14 +592,6 @@ if test "$enable_debug" = "yes"; then
Define to any value to include debugging code.])
fi
-if test "$enable_percent_zt" = "no"; then
- AC_DEFINE([DISABLE_PERCENT_ZT], [], [
- Define to any value to disable the use of the z and t modifiers in
- formatting settings such as %zu or %td (this is rarely needed).])
-else
- enable_percent_zt=auto
-fi
-
# Unless running under Windows, JIT support requires pthreads.
if test "$enable_jit" = "yes"; then
@@ -639,21 +621,13 @@ if test "$enable_pcre2grep_jit" = "yes"; then
fi
if test "$enable_pcre2grep_callout" = "yes"; then
- if test "$enable_pcre2grep_callout_fork" = "yes"; then
- if test "$HAVE_WINDOWS_H" != "1"; then
- if test "$HAVE_SYS_WAIT_H" != "1"; then
- AC_MSG_ERROR([Callout script support needs sys/wait.h.])
- fi
+ if test "$HAVE_WINDOWS_H" != "1"; then
+ if test "$HAVE_SYS_WAIT_H" != "1"; then
+ AC_MSG_ERROR([Callout script support needs sys/wait.h.])
fi
- AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
- Define to any value to enable fork support in pcre2grep callout scripts.
- This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
- defined.])
fi
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
Define to any value to enable callout script support in pcre2grep.])
-else
- enable_pcre2grep_callout_fork="no"
fi
if test "$enable_unicode" = "yes"; then
@@ -1055,7 +1029,6 @@ $PACKAGE-$VERSION configuration summary:
Build static libs .................. : ${enable_static}
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
- Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
@@ -1065,7 +1038,6 @@ $PACKAGE-$VERSION configuration summary:
Valgrind support ................... : ${enable_valgrind}
Code coverage ...................... : ${enable_coverage}
Fuzzer support ..................... : ${enable_fuzz_support}
- Use %zu and %td .................... : ${enable_percent_zt}
EOF
diff --git a/dist2/depcomp b/dist2/depcomp
index 65cbf709..b39f98f9 100755
--- a/dist2/depcomp
+++ b/dist2/depcomp
@@ -1,9 +1,9 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
-scriptversion=2018-03-07.03; # UTC
+scriptversion=2016-01-11.22; # UTC
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,7 +16,7 @@ scriptversion=2018-03-07.03; # UTC
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -783,7 +783,7 @@ exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
diff --git a/dist2/doc/html/NON-AUTOTOOLS-BUILD.txt b/dist2/doc/html/NON-AUTOTOOLS-BUILD.txt
index 39e7620a..b742ed34 100644
--- a/dist2/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/dist2/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -48,7 +48,7 @@ can skip ahead to the CMake section.
macro to specify what character(s) you want to be interpreted as line
terminators by default.
- When you subsequently compile any of the PCRE2 modules, you must specify
+ When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
@@ -62,11 +62,6 @@ can skip ahead to the CMake section.
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
- Note also that the src/config.h.generic file is created from a config.h
- that was generated by Autotools, which automatically includes settings of
- a number of macros that are not actually used by PCRE2 (for example,
- HAVE_MEMORY_H).
-
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
@@ -109,7 +104,6 @@ can skip ahead to the CMake section.
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
- pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
@@ -401,6 +395,6 @@ Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
-==============================
-Last Updated: 14 November 2018
-==============================
+===========================
+Last Updated: 19 April 2018
+===========================
diff --git a/dist2/doc/html/README.txt b/dist2/doc/html/README.txt
index ff9a6af6..2eb621b0 100644
--- a/dist2/doc/html/README.txt
+++ b/dist2/doc/html/README.txt
@@ -1,11 +1,9 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
-PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
-API. Since its initial release in 2015, there has been further development of
-the code and it now differs from PCRE1 in more than just the API. There are new
-features and the internals have been improved. The latest release of PCRE2 is
-always available in three alternative formats from:
+PCRE2 is a re-working of the original PCRE library to provide an entirely new
+API. The latest release of PCRE2 is always available in three alternative
+formats from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
@@ -41,13 +39,13 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
-32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
-are no C++ wrappers.
+32-bit library, which processes strings of 32-bit values. There are no C++
+wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
-man page). These are built into a library called libpcre2-posix. Note that this
-just provides a POSIX calling interface to PCRE2; the regular expressions
+man page). These can be found in a library called libpcre2-posix. Note that
+this just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
@@ -55,8 +53,20 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
-renamed or pointed at by a link (or the program modified, of course). See the
-pcre2posix documentation for more details.
+renamed or pointed at by a link.
+
+If you are using the POSIX interface to PCRE2 and there is already a POSIX
+regex library installed on your system, as well as worrying about the regex.h
+header file (as mentioned above), you must also take care when linking programs
+to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
+may pick up the POSIX functions of the same name from the other library.
+
+One way of avoiding this confusion is to compile PCRE2 with the addition of
+-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
+compiler flags (CFLAGS if you are using "configure" -- see below). This has the
+effect of renaming the functions so that the names no longer clash. Of course,
+you have to do the same thing for your applications, or write them using the
+new names.
Documentation for PCRE2
@@ -313,11 +323,7 @@ library. They are also documented in the pcre2build man page.
. There is support for calling external programs during matching in the
pcre2grep command, using PCRE2's callout facility with string arguments. This
support can be disabled by adding --disable-pcre2grep-callout to the
- "configure" command. There are two kinds of callout: one that generates
- output from inbuilt code, and another that calls an external program. The
- latter has special support for Windows and VMS; otherwise it assumes the
- existence of the fork() function. This facility can be disabled by adding
- --disable-pcre2grep-callout-fork to the "configure" command.
+ "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@@ -370,15 +376,6 @@ library. They are also documented in the pcre2build man page.
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it.
-. The C99 standard defines formatting modifiers z and t for size_t and
- ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
- environments other than Microsoft Visual Studio when __STDC_VERSION__ is
- defined and has a value greater than or equal to 199901L (indicating C99).
- However, there is at least one environment that claims to be C99 but does not
- support these modifiers. If --disable-percent-zt is specified, no use is made
- of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
- size_t values.
-
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
@@ -792,7 +789,6 @@ The distribution should contain the files listed below.
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
- src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
@@ -892,4 +888,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Last updated: 17 June 2018
diff --git a/dist2/doc/html/pcre2.html b/dist2/doc/html/pcre2.html
index dc2d058b..bc588c4c 100644
--- a/dist2/doc/html/pcre2.html
+++ b/dist2/doc/html/pcre2.html
@@ -162,7 +162,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2-config show PCRE2 installation configuration information
pcre2api details of PCRE2's native C API
pcre2build building PCRE2
- pcre2callout details of the pattern callout feature
+ pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
@@ -198,7 +198,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 17 September 2018
+Last updated: 11 July 2018
<br>
Copyright &copy; 1997-2018 University of Cambridge.
<br>
diff --git a/dist2/doc/html/pcre2_compile.html b/dist2/doc/html/pcre2_compile.html
index 23f75e10..d109eebb 100644
--- a/dist2/doc/html/pcre2_compile.html
+++ b/dist2/doc/html/pcre2_compile.html
@@ -86,12 +86,7 @@ PCRE2 must be built with Unicode support (the default) in order to use
PCRE2_UTF, PCRE2_UCP and related options.
</P>
<P>
-Additional options may be set in the compile context via the
-<a href="pcre2_set_compile_extra_options.html"><b>pcre2_set_compile_extra_options</b></a>
-function.
-</P>
-<P>
-The yield of this function is a pointer to a private data structure that
+The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected.
</P>
<P>
diff --git a/dist2/doc/html/pcre2_dfa_match.html b/dist2/doc/html/pcre2_dfa_match.html
index 232e2bce..8702cca3 100644
--- a/dist2/doc/html/pcre2_dfa_match.html
+++ b/dist2/doc/html/pcre2_dfa_match.html
@@ -51,8 +51,6 @@ depth limits. The <i>length</i> and <i>startoffset</i> values are code units, no
characters. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
- PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
diff --git a/dist2/doc/html/pcre2_jit_compile.html b/dist2/doc/html/pcre2_jit_compile.html
index bbaa82c2..15b0138b 100644
--- a/dist2/doc/html/pcre2_jit_compile.html
+++ b/dist2/doc/html/pcre2_jit_compile.html
@@ -40,7 +40,6 @@ bits:
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
- PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF
</pre>
The yield of the function is 0 for success, or a negative error code otherwise.
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
diff --git a/dist2/doc/html/pcre2_match.html b/dist2/doc/html/pcre2_match.html
index 90f7fcc1..ced70bb2 100644
--- a/dist2/doc/html/pcre2_match.html
+++ b/dist2/doc/html/pcre2_match.html
@@ -55,13 +55,11 @@ A match context is needed only if you want to:
Change the backtracking depth limit
Set custom memory management specifically for the match
</pre>
-The <i>length</i> and <i>startoffset</i> values are code units, not characters.
-The length may be given as PCRE2_ZERO_TERMINATED for a subject that is
-terminated by a binary zero code unit. The options are:
+The <i>length</i> and <i>startoffset</i> values are code
+units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
+subject that is terminated by a binary zero code unit. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
- PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
diff --git a/dist2/doc/html/pcre2_match_data_free.html b/dist2/doc/html/pcre2_match_data_free.html
index 6ba6162d..68a44611 100644
--- a/dist2/doc/html/pcre2_match_data_free.html
+++ b/dist2/doc/html/pcre2_match_data_free.html
@@ -31,11 +31,6 @@ using the memory freeing function from the general context or compiled pattern
with which it was created, or <b>free()</b> if that was not set.
</P>
<P>
-If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
-match data block, the copy of the subject that was remembered with the block is
-also freed.
-</P>
-<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
diff --git a/dist2/doc/html/pcre2_pattern_info.html b/dist2/doc/html/pcre2_pattern_info.html
index eaaac6c0..2e357094 100644
--- a/dist2/doc/html/pcre2_pattern_info.html
+++ b/dist2/doc/html/pcre2_pattern_info.html
@@ -19,8 +19,7 @@ SYNOPSIS
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
-<b>int pcre2_pattern_info(const pcre2_code *<i>code</i>, uint32_t <i>what</i>,</b>
-<b> void *<i>where</i>);</b>
+<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
diff --git a/dist2/doc/html/pcre2_set_compile_extra_options.html b/dist2/doc/html/pcre2_set_compile_extra_options.html
index c6c11f7e..73749313 100644
--- a/dist2/doc/html/pcre2_set_compile_extra_options.html
+++ b/dist2/doc/html/pcre2_set_compile_extra_options.html
@@ -20,7 +20,7 @@ SYNOPSIS
</P>
<P>
<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
-<b> uint32_t <i>extra_options</i>);</b>
+<b> PCRE2_SIZE <i>extra_options</i>);</b>
</P>
<br><b>
DESCRIPTION
@@ -31,9 +31,7 @@ housed in a compile context. It completely replaces all the bits. The extra
options are:
<pre>
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
- PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character
- PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
PCRE2_EXTRA_MATCH_WORD Pattern matches "words"
</pre>
diff --git a/dist2/doc/html/pcre2_set_substitute_callout.html b/dist2/doc/html/pcre2_set_substitute_callout.html
deleted file mode 100644
index 7ae3a398..00000000
--- a/dist2/doc/html/pcre2_set_substitute_callout.html
+++ /dev/null
@@ -1,43 +0,0 @@
-<html>
-<head>
-<title>pcre2_set_substitute_callout specification</title>
-</head>
-<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
-<h1>pcre2_set_substitute_callout man page</h1>
-<p>
-Return to the <a href="index.html">PCRE2 index page</a>.
-</p>
-<p>
-This page is part of the PCRE2 HTML documentation. It was generated
-automatically from the original man page. If there is any nonsense in it,
-please consult the man page, in case the conversion went wrong.
-<br>
-<br><b>
-SYNOPSIS
-</b><br>
-<P>
-<b>#include &#60;pcre2.h&#62;</b>
-</P>
-<P>
-<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
-<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
-<b> void *<i>callout_data</i>);</b>
-</P>
-<br><b>
-DESCRIPTION
-</b><br>
-<P>
-This function sets the substitute callout fields in a match context (the first
-argument). The second argument specifies a callout function, and the third
-argument is an opaque data item that is passed to it. The result of this
-function is always zero.
-</P>
-<P>
-There is a complete description of the PCRE2 native API in the
-<a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
-page.
-<p>
-Return to the <a href="index.html">PCRE2 index page</a>.
-</p>
diff --git a/dist2/doc/html/pcre2_substring_nametable_scan.html b/dist2/doc/html/pcre2_substring_nametable_scan.html
index 277affae..5a44ea9c 100644
--- a/dist2/doc/html/pcre2_substring_nametable_scan.html
+++ b/dist2/doc/html/pcre2_substring_nametable_scan.html
@@ -27,8 +27,8 @@ DESCRIPTION
</b><br>
<P>
This convenience function finds, for a compiled pattern, the first and last
-entries for a given name in the table that translates capture group names into
-numbers.
+entries for a given name in the table that translates capturing parenthesis
+names into numbers.
<pre>
<i>code</i> Compiled regular expression
<i>name</i> Name whose entries required
diff --git a/dist2/doc/html/pcre2api.html b/dist2/doc/html/pcre2api.html
index 7ca39f51..17f9794d 100644
--- a/dist2/doc/html/pcre2api.html
+++ b/dist2/doc/html/pcre2api.html
@@ -49,7 +49,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
-<li><a name="TOC37" href="#SEC37">DUPLICATE CAPTURE GROUP NAMES</a>
+<li><a name="TOC37" href="#SEC37">DUPLICATE SUBPATTERN NAMES</a>
<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
@@ -182,11 +182,6 @@ document for an overview of all the PCRE2 documentation.
<b> void *<i>callout_data</i>);</b>
<br>
<br>
-<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
-<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
-<b> void *<i>callout_data</i>);</b>
-<br>
-<br>
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE <i>value</i>);</b>
<br>
@@ -312,8 +307,7 @@ document for an overview of all the PCRE2 documentation.
<b>const unsigned char *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
<br>
<br>
-<b>int pcre2_pattern_info(const pcre2_code *<i>code</i>, uint32_t <i>what</i>,</b>
-<b> void *<i>where</i>);</b>
+<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
@@ -853,7 +847,7 @@ functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
<b> uint32_t <i>value</i>);</b>
<br>
<br>
-This parameter adjusts the limit, set when PCRE2 is built (default 250), on the
+This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
using up too much system stack when being compiled. The limit applies to
parentheses of all kinds, not just capturing parentheses.
@@ -918,23 +912,12 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
<b> void *<i>callout_data</i>);</b>
<br>
<br>
-This sets up a callout function for PCRE2 to call at specified points
+This sets up a "callout" function for PCRE2 to call at specified points
during a matching operation. Details are given in the
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation.
<br>
<br>
-<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
-<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
-<b> void *<i>callout_data</i>);</b>
-<br>
-<br>
-This sets up a callout function for PCRE2 to call after each substitution
-made by <b>pcre2_substitute()</b>. Details are given in the section entitled
-"Creating a new string with substitutions"
-<a href="#substitutions">below.</a>
-<br>
-<br>
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE <i>value</i>);</b>
<br>
@@ -948,7 +931,7 @@ substitutions.
</P>
<P>
For example, if the pattern /abc/ is matched against "123abc" with an offset
-limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be
+limit less than 3, the result is PCRE2_ERROR_NO_MATCH. A match can never be
found if the <i>startoffset</i> argument of <b>pcre2_match()</b>,
<b>pcre2_dfa_match()</b>, or <b>pcre2_substitute()</b> is greater than the offset
limit set in the match context.
@@ -1299,24 +1282,21 @@ are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
Copies of both the code and the tables are made, with the new code pointing to
the new tables. The memory for the new tables is automatically freed when
<b>pcre2_code_free()</b> is called for the new copy of the compiled code. If
-<b>pcre2_code_copy_with_tables()</b> is called with a NULL argument, it returns
+<b>pcre2_code_copy_withy_tables()</b> is called with a NULL argument, it returns
NULL.
</P>
<P>
NOTE: When one of the matching functions is called, pointers to the compiled
pattern and the subject string are set in the match data block so that they can
-be referenced by the substring extraction functions after a successful match.
-After running a match, you must not free a compiled pattern or a subject string
-until after all operations on the
+be referenced by the substring extraction functions. After running a match, you
+must not free a compiled pattern (or a subject string) until after all
+operations on the
<a href="#matchdatablock">match data block</a>
-have taken place, unless, in the case of the subject string, you have used the
-PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
-"Option bits for <b>pcre2_match()</b>"
-<a href="#matchoptions>">below.</a>
+have taken place.
</P>
<P>
The <i>options</i> argument for <b>pcre2_compile()</b> contains various bit
-settings that affect the compilation. It should be zero if none of them are
+settings that affect the compilation. It should be zero if no options are
required. The available options are described below. Some of them (in
particular, those that are compatible with Perl, but some others as well) can
also be set and unset from within the pattern (see the detailed description in
@@ -1331,9 +1311,8 @@ compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
options can be set at the time of matching as well as at compile time.
</P>
<P>
-Some additional options and less frequently required compile-time parameters
-(for example, the newline setting) can be provided in a compile context (as
-described
+Other, less frequently required compile-time parameters (for example, the
+newline setting) can be provided in a compile context (as described
<a href="#compilecontext">above).</a>
</P>
<P>
@@ -1386,13 +1365,7 @@ This code fragment shows a typical straightforward call to
&errorcode, /* for error code */
&erroffset, /* for error offset */
NULL); /* no compile context */
-
-</PRE>
-</P>
-<br><b>
-Main compile options
-</b><br>
-<P>
+</pre>
The following names for option bits are defined in the <b>pcre2.h</b> header
file:
<pre>
@@ -1432,14 +1405,6 @@ hexadecimal digits, in which case the hexadecimal number defines the code point
to match. By default, as in Perl, a hexadecimal number is always expected after
\x, but it may have zero, one, or two digits (so, for example, \xz matches a
binary zero character followed by z).
-</P>
-<P>
-ECMAscript 6 added additional functionality to \u. This can be accessed using
-the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
-<a href="#extracompileoptions">below).</a>
-Note that this alternative escape handling applies only to patterns. Neither of
-these options affects the processing of replacement strings passed to
-<b>pcre2_substitute()</b>.
<pre>
PCRE2_ALT_CIRCUMFLEX
</pre>
@@ -1506,10 +1471,10 @@ independent of the setting of PCRE2_DOTALL.
<pre>
PCRE2_DUPNAMES
</pre>
-If this bit is set, names used to identify capture groups need not be unique.
-This can be helpful for certain types of pattern when it is known that only one
-instance of the named group can ever be matched. There are more details of
-named capture groups below; see also the
+If this bit is set, names used to identify capturing subpatterns need not be
+unique. This can be helpful for certain types of pattern when it is known that
+only one instance of the named subpattern can ever be matched. There are more
+details of named subpatterns below; see also the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation.
<pre>
@@ -1542,11 +1507,11 @@ the end of the subject.
If this bit is set, most white space characters in the pattern are totally
ignored except when escaped or inside a character class. However, white space
is not allowed within sequences such as (?&#62; that introduce various
-parenthesized groups, nor within numerical quantifiers such as {1,3}. Ignorable
-white space is permitted between an item and a following quantifier and between
-a quantifier and a following + that indicates possessiveness. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+parenthesized subpatterns, nor within numerical quantifiers such as {1,3}.
+Ignorable white space is permitted between an item and a following quantifier
+and between a quantifier and a following + that indicates possessiveness.
+PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
+a pattern by a (?x) option setting.
</P>
<P>
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
@@ -1622,7 +1587,7 @@ error.
<pre>
PCRE2_MATCH_UNSET_BACKREF
</pre>
-If this option is set, a backreference to an unset capture group matches an
+If this option is set, a backreference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
@@ -1684,7 +1649,7 @@ If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). This is the same as Perl's /n option.
-Note that, when this option is set, references to capture groups
+Note that, when this option is set, references to capturing groups
(backreferences or recursion/subroutine calls) may only refer to named groups,
though the reference can be by name or by number.
<pre>
@@ -1703,7 +1668,7 @@ purposes.
If this option is set, it disables an optimization that is applied when .* is
the first significant item in a top-level branch of a pattern, and all the
other branches also start with .* or with \A or \G or ^. The optimization is
-automatically disabled for .* if it is inside an atomic group or a capture
+automatically disabled for .* if it is inside an atomic group or a capturing
group that is the subject of a backreference, or if the pattern contains
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
automatically anchored if PCRE2_DOTALL is set for all the .* items and
@@ -1846,8 +1811,9 @@ characters with code points greater than 127.
Extra compile options
</b><br>
<P>
-The option bits that can be set in a compile context by calling the
-<b>pcre2_set_compile_extra_options()</b> function are as follows:
+Unlike the main compile-time options, the extra options are not saved with the
+compiled pattern. The option bits that can be set in a compile context by
+calling the <b>pcre2_set_compile_extra_options()</b> function are as follows:
<pre>
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
</pre>
@@ -1873,14 +1839,6 @@ point values in UTF-8 and UTF-32 patterns no longer provoke errors and are
incorporated in the compiled pattern. However, they can only match subject
characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
<pre>
- PCRE2_EXTRA_ALT_BSUX
-</pre>
-The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in
-the way that ECMAscript (aka JavaScript) does. Additional functionality was
-defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
-PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
-character code, where hhh.. is any number of hexadecimal digits.
-<pre>
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
</pre>
This is a dangerous option. Use with care. By default, an unrecognized escape
@@ -1893,22 +1851,11 @@ always causes an error in Perl.
</P>
<P>
If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
-<b>pcre2_compile()</b>, all unrecognized or malformed escape sequences are
+<b>pcre2_compile()</b>, all unrecognized or erroneous escape sequences are
treated as single-character escapes. For example, \j is a literal "j" and
\x{2z} is treated as the literal string "x{2z}". Setting this option means
-that typos in patterns may go undetected and have unexpected results. Also note
-that a sequence such as [\N{] is interpreted as a malformed attempt at
-[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an
-unqualified \N is a valid escape sequence but is not supported in a character
-class. To reiterate: this is a dangerous option. Use with great care.
-<pre>
- PCRE2_EXTRA_ESCAPED_CR_IS_LF
-</pre>
-There are some legacy applications where the escape sequence \r in a pattern
-is expected to match a newline. If this option is set, \r in a pattern is
-converted to \n so that it matches a LF (linefeed) instead of a CR (carriage
-return) character. The option does not affect a literal CR in the pattern, nor
-does it affect CR specified as an explicit code point such as \x{0D}.
+that typos in patterns may go undetected and have unexpected results. This is a
+dangerous option. Use with care.
<pre>
PCRE2_EXTRA_MATCH_LINE
</pre>
@@ -2089,7 +2036,7 @@ When .* is the first significant item, anchoring is possible only when all the
following are true:
<pre>
.* is not in an atomic group
- .* is not in a capture group that is the subject of a backreference
+ .* is not in a capturing group that is the subject of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
@@ -2100,12 +2047,12 @@ options returned for PCRE2_INFO_ALLOPTIONS.
PCRE2_INFO_BACKREFMAX
</pre>
Return the number of the highest backreference in the pattern. The third
-argument should point to an <b>uint32_t</b> variable. Named capture groups
-acquire numbers as well as names, and these count towards the highest
-backreference. Backreferences such as \4 or \g{12} match the captured
-characters of the given group, but in addition, the check that a capture
-group is set in a conditional group such as (?(3)a|b) is also a backreference.
-Zero is returned if there are no backreferences.
+argument should point to an <b>uint32_t</b> variable. Named subpatterns acquire
+numbers as well as names, and these count towards the highest backreference.
+Backreferences such as \4 or \g{12} match the captured characters of the
+given group, but in addition, the check that a capturing group is set in a
+conditional subpattern such as (?(3)a|b) is also a backreference. Zero is
+returned if there are no backreferences.
<pre>
PCRE2_INFO_BSR
</pre>
@@ -2116,9 +2063,9 @@ that \R matches only CR, LF, or CRLF.
<pre>
PCRE2_INFO_CAPTURECOUNT
</pre>
-Return the highest capture group number in the pattern. In patterns where (?|
-is not used, this is also the total number of capture groups. The third
-argument should point to an <b>uint32_t</b> variable.
+Return the highest capturing subpattern number in the pattern. In patterns
+where (?| is not used, this is also the total number of capturing subpatterns.
+The third argument should point to an <b>uint32_t</b> variable.
<pre>
PCRE2_INFO_DEPTHLIMIT
</pre>
@@ -2166,7 +2113,7 @@ Return the size (in bytes) of the data frames that are used to remember
backtracking positions when the pattern is processed by <b>pcre2_match()</b>
without the use of JIT. The third argument should point to a <b>size_t</b>
variable. The frame size depends on the number of capturing parentheses in the
-pattern. Each additional capture group adds two PCRE2_SIZE variables.
+pattern. Each additional capturing group adds two PCRE2_SIZE variables.
<pre>
PCRE2_INFO_HASBACKSLASHC
</pre>
@@ -2290,20 +2237,20 @@ the parenthesis number. The rest of the entry is the corresponding name, zero
terminated.
</P>
<P>
-The names are in alphabetical order. If (?| is used to create multiple capture
-groups with the same number, as described in the
-<a href="pcre2pattern.html#dupgroupnumber">section on duplicate group numbers</a>
+The names are in alphabetical order. If (?| is used to create multiple groups
+with the same number, as described in the
+<a href="pcre2pattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
page, the groups may be given the same name, but there is only one entry in the
table. Different names for groups of the same number are not permitted.
</P>
<P>
-Duplicate names for capture groups with different numbers are permitted, but
-only if PCRE2_DUPNAMES is set. They appear in the table in the order in which
-they were found in the pattern. In the absence of (?| this is the order of
+Duplicate names for subpatterns with different numbers are permitted, but only
+if PCRE2_DUPNAMES is set. They appear in the table in the order in which they
+were found in the pattern. In the absence of (?| this is the order of
increasing number; when (?| is used this is not necessarily the case because
-later capture groups may have lower numbers.
+later subpatterns may have lower numbers.
</P>
<P>
As a simple example of the name/number table, consider the following pattern
@@ -2312,16 +2259,16 @@ space - including newlines - is ignored):
<pre>
(?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )
</pre>
-There are four named capture groups, so the table has four entries, and each
-entry in the table is eight bytes long. The table is as follows, with
-non-printing bytes shows in hexadecimal, and undefined bytes shown as ??:
+There are four named subpatterns, so the table has four entries, and each entry
+in the table is eight bytes long. The table is as follows, with non-printing
+bytes shows in hexadecimal, and undefined bytes shown as ??:
<pre>
00 01 d a t e 00 ??
00 05 d a y 00 ?? ??
00 04 m o n t h 00
00 02 y e a r 00 ??
</pre>
-When writing code to extract data from named capture groups using the
+When writing code to extract data from named subpatterns using the
name-to-number map, remember that the length of the entries is likely to be
different for each compiled pattern.
<pre>
@@ -2446,13 +2393,9 @@ on the error, and is detailed below.
<P>
When one of the matching functions is called, pointers to the compiled pattern
and the subject string are set in the match data block so that they can be
-referenced by the extraction functions after a successful match. After running
-a match, you must not free a compiled pattern or a subject string until after
-all operations on the match data block (for that match) have taken place,
-unless, in the case of the subject string, you have used the
-PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
-"Option bits for <b>pcre2_match()</b>"
-<a href="#matchoptions>">below.</a>
+referenced by the extraction functions. After running a match, you must not
+free a compiled pattern or a subject string until after all operations on the
+match data block (for that match) have taken place.
</P>
<P>
When a match data block itself is no longer needed, it should be freed by
@@ -2564,10 +2507,10 @@ Option bits for <b>pcre2_match()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
-zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
-PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
+zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
+Their action is described below.
</P>
<P>
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
@@ -2583,22 +2526,6 @@ to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT
matching.
<pre>
- PCRE2_COPY_MATCHED_SUBJECT
-</pre>
-By default, a pointer to the subject is remembered in the match data block so
-that, after a successful match, it can be referenced by the substring
-extraction functions. This means that the subject's memory must not be freed
-until all such operations are complete. For some applications where the
-lifetime of the subject string is not guaranteed, it may be necessary to make a
-copy of the subject string, but it is wasteful to do this unless the match is
-successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the
-subject is copied and the new pointer is remembered in the match data block
-instead of the original subject pointer. The memory allocator that was used for
-the match block itself is used. The copy is automatically freed when
-<b>pcre2_match_data_free()</b> is called to free the match data block. It is also
-automatically freed if the match data block is re-used for another match
-operation.
-<pre>
PCRE2_ENDANCHORED
</pre>
If the PCRE2_ENDANCHORED option is set, any string that <b>pcre2_match()</b>
@@ -2764,12 +2691,12 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
-book, this is called "capturing" in what follows, and the phrase "capture
-group" (Perl terminology) is used for a fragment of a pattern that picks out a
-substring. PCRE2 supports several other kinds of parenthesized group that do
-not cause substrings to be captured. The <b>pcre2_pattern_info()</b> function
-can be used to find out how many capture groups there are in a compiled
-pattern.
+book, this is called "capturing" in what follows, and the phrase "capturing
+subpattern" or "capturing group" is used for a fragment of a pattern that picks
+out a substring. PCRE2 supports several other kinds of parenthesized subpattern
+that do not cause substrings to be captured. The <b>pcre2_pattern_info()</b>
+function can be used to find out how many capturing subpatterns there are in a
+compiled pattern.
</P>
<P>
You can use auxiliary functions for accessing captured substrings
@@ -2818,8 +2745,9 @@ For example, if the pattern (?=ab\K) is matched against "ab", the start and
end offset values for the match are 2 and 0.
</P>
<P>
-If a capture group is matched repeatedly within a single match operation, it is
-the last portion of the subject that it matched that is returned.
+If a capturing subpattern group is matched repeatedly within a single match
+operation, it is the last portion of the subject that it matched that is
+returned.
</P>
<P>
If the ovector is too small to hold all the captured substring offsets, as much
@@ -2828,20 +2756,21 @@ substrings are not of interest, <b>pcre2_match()</b> may be called with a match
data block whose ovector is of minimum length (that is, one pair).
</P>
<P>
-It is possible for capture group number <i>n+1</i> to match some part of the
-subject when group <i>n</i> has not been used at all. For example, if the string
-"abc" is matched against the pattern (a|(z))(bc) the return from the function
-is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both
-values in the offset pairs corresponding to unused groups are set to
-PCRE2_UNSET.
+It is possible for capturing subpattern number <i>n+1</i> to match some part of
+the subject when subpattern <i>n</i> has not been used at all. For example, if
+the string "abc" is matched against the pattern (a|(z))(bc) the return from the
+function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
+happens, both values in the offset pairs corresponding to unused subpatterns
+are set to PCRE2_UNSET.
</P>
<P>
-Offset values that correspond to unused groups at the end of the expression are
-also set to PCRE2_UNSET. For example, if the string "abc" is matched against
-the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the
-function is 2, because the highest used capture group number is 1. The offsets
-for for the second and third capture groupss (assuming the vector is large
-enough, of course) are set to PCRE2_UNSET.
+Offset values that correspond to unused subpatterns at the end of the
+expression are also set to PCRE2_UNSET. For example, if the string "abc" is
+matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched.
+The return from the function is 2, because the highest used capturing
+subpattern number is 1. The offsets for for the second and third capturing
+subpatterns (assuming the vector is large enough, of course) are set to
+PCRE2_UNSET.
</P>
<P>
Elements in the ovector that do not correspond to capturing parentheses in the
@@ -2865,23 +2794,22 @@ undefined.
</P>
<P>
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
-to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
-<b>pcre2_get_mark()</b> can be called to access this name, which can be
-specified in the pattern by any of the backtracking control verbs, not just
-(*MARK). The same function applies to all the verbs. It returns a pointer to
-the zero-terminated name, which is within the compiled pattern. If no name is
+to match (PCRE2_ERROR_NOMATCH), a (*MARK), (*PRUNE), or (*THEN) name may be
+available. The function <b>pcre2_get_mark()</b> can be called to access this
+name. The same function applies to all three verbs. It returns a pointer to the
+zero-terminated name, which is within the compiled pattern. If no name is
available, NULL is returned. The length of the name (excluding the terminating
zero) is stored in the code unit that precedes the name. You should use this
length instead of relying on the terminating zero if the name might contain a
binary zero.
</P>
<P>
-After a successful match, the name that is returned is the last mark name
-encountered on the matching path through the pattern. Instances of backtracking
-verbs without names do not count. Thus, for example, if the matching path
-contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a
-partial match, the last encountered name is returned. For example, consider
-this pattern:
+After a successful match, the name that is returned is the last (*MARK),
+(*PRUNE), or (*THEN) name encountered on the matching path through the pattern.
+Instances of (*PRUNE) and (*THEN) without names are ignored. Thus, for example,
+if the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned.
+After a "no match" or a partial match, the last encountered name is returned.
+For example, consider this pattern:
<pre>
^(*MARK:A)((*MARK:B)a|b)c
</pre>
@@ -2896,7 +2824,7 @@ is removed from the pattern above, there is an initial check for the presence
of "c" in the subject before running the matching engine. This check fails for
"bx", causing a match failure without seeing any marks. You can disable the
start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
-<b>pcre2_compile()</b> or by starting the pattern with (*NO_START_OPT).
+<b>pcre2_compile()</b> or starting the pattern with (*NO_START_OPT).
</P>
<P>
After a successful match, a partial match, or one of the invalid UTF errors
@@ -3002,8 +2930,7 @@ The backtracking match limit was reached.
If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+if the amount of memory needed exceeds the heap limit.
<pre>
PCRE2_ERROR_NULL
</pre>
@@ -3014,11 +2941,11 @@ as NULL.
</pre>
This error is returned when <b>pcre2_match()</b> detects a recursion loop within
the pattern. Specifically, it means that either the whole pattern or a
-capture group has been called recursively for the second time at the same
-position in the subject string. Some simple patterns that might do this are
-detected and faulted at compile time, but more complicated cases, in particular
-mutual recursions between two different groups, cannot be detected until
-matching is attempted.
+subpattern has been called recursively for the second time at the same position
+in the subject string. Some simple patterns that might do this are detected and
+faulted at compile time, but more complicated cases, in particular mutual
+recursions between two different subpatterns, cannot be detected until matching
+is attempted.
<a name="geterrormessage"></a></P>
<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
<P>
@@ -3095,7 +3022,7 @@ The <b>pcre2_substring_copy_bynumber()</b> function copies a captured substring
into a supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it
into new memory, obtained using the same memory allocation function that was
used for the match data block. The first two arguments of these functions are a
-pointer to the match data block and a capture group number.
+pointer to the match data block and a capturing group number.
</P>
<P>
The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
@@ -3171,9 +3098,9 @@ calling <b>pcre2_substring_list_free()</b>.
</P>
<P>
If this function encounters a substring that is unset, which can happen when
-capture group number <i>n+1</i> matches some part of the subject, but group
-<i>n</i> has not been used at all, it returns an empty string. This can be
-distinguished from a genuine zero-length substring by inspecting the
+capturing subpattern number <i>n+1</i> matches some part of the subject, but
+subpattern <i>n</i> has not been used at all, it returns an empty string. This
+can be distinguished from a genuine zero-length substring by inspecting the
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
<a name="extractbyname"></a></P>
@@ -3203,21 +3130,21 @@ For example, for this pattern:
<pre>
(a+)b(?&#60;xxx&#62;\d+)...
</pre>
-the number of the capture group called "xxx" is 2. If the name is known to be
+the number of the subpattern called "xxx" is 2. If the name is known to be
unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
compiled pattern, and the second is the name. The yield of the function is the
-group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or
-PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name.
-Given the number, you can extract the substring directly from the ovector, or
-use one of the "bynumber" functions described above.
+subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
+name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
+that name. Given the number, you can extract the substring directly from the
+ovector, or use one of the "bynumber" functions described above.
</P>
<P>
For convenience, there are also "byname" functions that correspond to the
"bynumber" functions, the only difference being that the second argument is a
name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
names, these functions scan all the groups with the given name, and return the
-captured substring from the first named group that is set.
+first named string that is set.
</P>
<P>
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
@@ -3228,38 +3155,34 @@ set, PCRE2_ERROR_UNSET is returned.
</P>
<P>
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
-capture groups with the same number, as described in the
-<a href="pcre2pattern.html#dupgroupnumber">section on duplicate group numbers</a>
+subpatterns with the same number, as described in the
+<a href="pcre2pattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
-page, you cannot use names to distinguish the different capture groups, because
+page, you cannot use names to distinguish the different subpatterns, because
names are not included in the compiled code. The matching process uses only
-numbers. For this reason, the use of different names for groups with the
+numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
-<a name="substitutions"></a></P>
+</P>
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
<P>
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
-<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
+<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
</P>
<P>
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
-string in <i>outputbuffer</i>, replacing one or more parts that were matched
-with the <i>replacement</i> string, whose length is supplied in <b>rlength</b>.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
-</P>
-<P>
-Matches in which a \K item in a lookahead in the pattern causes the match to
-end before it starts are not supported, and give rise to an error return. For
-global replacements, matches in which \K in a lookbehind causes the match to
-start earlier than the point that was reached in the previous iteration are
-also not supported.
+string in <i>outputbuffer</i>, replacing the part that was matched with the
+<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
+be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
+which a \K item in a lookahead in the pattern causes the match to end before
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \K in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
</P>
<P>
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
@@ -3271,9 +3194,9 @@ allocate memory for the compiled code.
</P>
<P>
If an external <i>match_data</i> block is provided, its contents afterwards
-are those set by the final call to <b>pcre2_match()</b>. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+are those set by the final call to <b>pcre2_match()</b>, which will have
+ended in a matching error. The contents of the ovector within the match data
+block may or may not have been changed.
</P>
<P>
The <i>outlengthptr</i> argument must point to a variable that contains the
@@ -3297,12 +3220,12 @@ length is in code units, not bytes.
In the replacement string, which is interpreted as a UTF string in UTF mode,
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
+pattern. The following forms are always recognized:
<pre>
$$ insert a dollar character
$&#60;n&#62; or ${&#60;n&#62;} insert the contents of group &#60;n&#62;
- $*MARK or ${*MARK} insert a control verb name
+ $*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
</pre>
Either a group number or a group name can be given for &#60;n&#62;. Curly brackets are
required only if the following character would be interpreted as part of the
@@ -3311,11 +3234,11 @@ For example, if the pattern a(b)c is matched with "=abc=" and the replacement
string "+$1$0$1+", the result is "=+babcb+=".
</P>
<P>
-$*MARK inserts the name from the last encountered backtracking control verb on
-the matching path that has a name. (*MARK) must always include a name, but the
-other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name
-inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This
-facility can be used to perform simple simultaneous substitutions, as this
+$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
+on the matching path that has a name. (*MARK) must always include a name, but
+(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
+the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
+This facility can be used to perform simple simultaneous substitutions, as this
<b>pcre2test</b> example shows:
<pre>
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
@@ -3366,13 +3289,13 @@ efficient to allocate a large buffer and free the excess afterwards, instead of
using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
</P>
<P>
-PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups that do
not appear in the pattern to be treated as unset groups. This option should be
used with care, because it means that a typo in a group name or number no
longer causes the PCRE2_ERROR_NOSUBSTRING error.
</P>
<P>
-PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown
+PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including unknown
groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty
strings when inserted as described above. If this option is not set, an attempt
to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does
@@ -3400,18 +3323,16 @@ terminating a \Q quoted sequence) reverts to no case forcing. The sequences
\u and \l force the next character (if it is a letter) to upper or lower
case, respectively, and then the state automatically reverts to no case
forcing. Case forcing applies to all inserted characters, including those from
-capture groups and letters within \Q...\E quoted sequences.
+captured groups and letters within \Q...\E quoted sequences.
</P>
<P>
Note that case forcing sequences such as \U...\E do not nest. For example,
the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
-effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
-not apply to not apply to replacement strings.
+effect.
</P>
<P>
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
-flexibility to capture group substitution. The syntax is similar to that used
-by Bash:
+flexibility to group substitution. The syntax is similar to that used by Bash:
<pre>
${&#60;n&#62;:-&#60;string&#62;}
${&#60;n&#62;:+&#60;string1&#62;:&#60;string2&#62;}
@@ -3439,9 +3360,9 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
groups in the extended syntax forms to be treated as unset.
</P>
<P>
-If successful, <b>pcre2_substitute()</b> returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
+If successful, <b>pcre2_substitute()</b> returns the number of replacements that
+were made. This may be zero if no matches were found, and is never greater than
+1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
</P>
<P>
In the event of an error, a negative error code is returned. Except for
@@ -3478,84 +3399,20 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
"Obtaining a textual error message"
<a href="#geterrormessage">above).</a>
</P>
-<br><b>
-Substitution callouts
-</b><br>
-<P>
-<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
-<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
-<b> void *<i>callout_data</i>);</b>
-<br>
-<br>
-The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
-callout function for <b>pcre2_substitute()</b>. This information is passed in
-a match context. The callout function is called after each substitution has
-been processed, but it can cause the replacement not to happen. The callout
-function is not called for simulated substitutions that happen as a result of
-the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
-</P>
-<P>
-The first argument of the callout function is a pointer to a substitute callout
-block structure, which contains the following fields, not necessarily in this
-order:
-<pre>
- uint32_t <i>version</i>;
- uint32_t <i>subscount</i>;
- PCRE2_SPTR <i>input</i>;
- PCRE2_SPTR <i>output</i>;
- PCRE2_SIZE <i>*ovector</i>;
- uint32_t <i>oveccount</i>;
- PCRE2_SIZE <i>output_offsets[2]</i>;
-</pre>
-The <i>version</i> field contains the version number of the block format. The
-current version is 0. The version number will increase in future if more fields
-are added, but the intention is never to remove any of the existing fields.
-</P>
-<P>
-The <i>subscount</i> field is the number of the current match. It is 1 for the
-first callout, 2 for the second, and so on. The <i>input</i> and <i>output</i>
-pointers are copies of the values passed to <b>pcre2_substitute()</b>.
-</P>
-<P>
-The <i>ovector</i> field points to the ovector, which contains the result of the
-most recent match. The <i>oveccount</i> field contains the number of pairs that
-are set in the ovector, and is always greater than zero.
-</P>
-<P>
-The <i>output_offsets</i> vector contains the offsets of the replacement in the
-output string. This has already been processed for dollar and (if requested)
-backslash substitutions as described above.
-</P>
-<P>
-The second argument of the callout function is the value passed as
-<i>callout_data</i> when the function was registered. The value returned by the
-callout function is interpreted as follows:
-</P>
-<P>
-If the value is zero, the replacement is accepted, and, if
-PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next
-match. If the value is not zero, the current replacement is not accepted. If
-the value is greater than zero, processing continues when
-PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or
-PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is copied to the
-output and the call to <b>pcre2_substitute()</b> exits, returning the number of
-matches so far.
-</P>
-<br><a name="SEC37" href="#TOC1">DUPLICATE CAPTURE GROUP NAMES</a><br>
+<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
<P>
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
</P>
<P>
-When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture
-groups are not required to be unique. Duplicate names are always allowed for
-groups with the same number, created by using the (?| feature. Indeed, if such
-groups are named, they are required to use the same names.
+When a pattern is compiled with the PCRE2_DUPNAMES option, names for
+subpatterns are not required to be unique. Duplicate names are always allowed
+for subpatterns with the same number, created by using the (?| feature. Indeed,
+if such subpatterns are named, they are required to use the same names.
</P>
<P>
-Normally, patterns that use duplicate names are such that in any one match,
-only one of each set of identically-named groups participates. An example is
-shown in the
+Normally, patterns with duplicate names are such that in any one match, only
+one of the named subpatterns participates. An example is shown in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation.
</P>
@@ -3660,12 +3517,11 @@ Option bits for <b>pcre_dfa_match()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
-be zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
-PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last
-four of these are exactly the same as for <b>pcre2_match()</b>, so their
-description is not repeated here.
+be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
+and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
+for <b>pcre2_match()</b>, so their description is not repeated here.
<pre>
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
@@ -3727,8 +3583,9 @@ the three matched strings are
On success, the yield of the function is a number greater than zero, which is
the number of matched substrings. The offsets of the substrings are returned in
the ovector, and can be extracted by number in the same way as for
-<b>pcre2_match()</b>, but the numbers bear no relation to any capture groups
-that may exist in the pattern, because DFA matching does not support capturing.
+<b>pcre2_match()</b>, but the numbers bear no relation to any capturing groups
+that may exist in the pattern, because DFA matching does not support group
+capture.
</P>
<P>
Calls to the convenience functions that extract substrings by name
@@ -3770,7 +3627,7 @@ a backreference.
</pre>
This return is given if <b>pcre2_dfa_match()</b> encounters a condition item
that uses a backreference for the condition, or a test for recursion in a
-specific capture group. These are not supported.
+specific group. These are not supported.
<pre>
PCRE2_ERROR_DFA_WSSIZE
</pre>
@@ -3779,9 +3636,9 @@ This return is given if <b>pcre2_dfa_match()</b> runs out of space in the
<pre>
PCRE2_ERROR_DFA_RECURSE
</pre>
-When a recursion or subroutine call is processed, the matching function calls
-itself recursively, using private memory for the ovector and <i>workspace</i>.
-This error is given if the internal ovector is not large enough. This should be
+When a recursive subpattern is processed, the matching function calls itself
+recursively, using private memory for the ovector and <i>workspace</i>. This
+error is given if the internal ovector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
<pre>
PCRE2_ERROR_DFA_BADRESTART
@@ -3808,9 +3665,9 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 14 February 2019
+Last updated: 07 September 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2build.html b/dist2/doc/html/pcre2build.html
index 13d9da22..e31b604c 100644
--- a/dist2/doc/html/pcre2build.html
+++ b/dist2/doc/html/pcre2build.html
@@ -33,12 +33,11 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC18" href="#SEC18">INCLUDING DEBUGGING CODE</a>
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
-<li><a name="TOC21" href="#SEC21">DISABLING THE Z AND T FORMATTING MODIFIERS</a>
-<li><a name="TOC22" href="#SEC22">SUPPORT FOR FUZZERS</a>
-<li><a name="TOC23" href="#SEC23">OBSOLETE OPTION</a>
-<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
-<li><a name="TOC25" href="#SEC25">AUTHOR</a>
-<li><a name="TOC26" href="#SEC26">REVISION</a>
+<li><a name="TOC21" href="#SEC21">SUPPORT FOR FUZZERS</a>
+<li><a name="TOC22" href="#SEC22">OBSOLETE OPTION</a>
+<li><a name="TOC23" href="#SEC23">SEE ALSO</a>
+<li><a name="TOC24" href="#SEC24">AUTHOR</a>
+<li><a name="TOC25" href="#SEC25">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
<P>
@@ -376,15 +375,12 @@ environment.
</P>
<br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
<P>
-By default <b>pcre2grep</b> supports the use of callouts with string arguments
-within the patterns it is matching. There are two kinds: one that generates
-output using local code, and another that calls an external program or script.
-If --disable-pcre2grep-callout-fork is added to the <b>configure</b> command,
-only the first kind of callout is supported; if --disable-pcre2grep-callout is
-used, all callouts are completely ignored. For more details of <b>pcre2grep</b>
-callouts, see the
+By default, on non-Windows systems, <b>pcre2grep</b> supports the use of
+callouts with string arguments within the patterns it is matching, in order to
+run external scripts. For details, see the
<a href="pcre2grep.html"><b>pcre2grep</b></a>
-documentation.
+documentation. This support can be disabled by adding
+--disable-pcre2grep-callout to the <b>configure</b> command.
</P>
<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<P>
@@ -527,21 +523,7 @@ This cleans all coverage data including the generated coverage report. For more
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
documentation.
</P>
-<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
-<P>
-The C99 standard defines formatting modifiers z and t for size_t and
-ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
-However, there is at least one environment that claims to be C99 but does not
-support these modifiers. If
-<pre>
- --disable-percent-zt
-</pre>
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
-%lu is used, with a cast for size_t values.
-</P>
-<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
+<br><a name="SEC21" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
<P>
There is a special option for use by people who want to run fuzzing tests on
PCRE2:
@@ -565,7 +547,7 @@ arguments: if an argument starts with "=" the rest of it is a literal input
string. Otherwise, it is assumed to be a file name, and the contents of the
file are the test string.
</P>
-<br><a name="SEC23" href="#TOC1">OBSOLETE OPTION</a><br>
+<br><a name="SEC22" href="#TOC1">OBSOLETE OPTION</a><br>
<P>
In versions of PCRE2 prior to 10.30, there were two ways of handling
backtracking in the <b>pcre2_match()</b> function. The default was to use the
@@ -577,11 +559,11 @@ was set, memory on the heap was used. From release 10.30 onwards this has
changed (the stack is no longer used) and this option now does nothing except
give a warning.
</P>
-<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC23" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
</P>
-<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC24" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -590,11 +572,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC26" href="#TOC1">REVISION</a><br>
+<br><a name="SEC25" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 03 March 2019
+Last updated: 26 April 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2callout.html b/dist2/doc/html/pcre2callout.html
index 65db9336..9b6ae6fd 100644
--- a/dist2/doc/html/pcre2callout.html
+++ b/dist2/doc/html/pcre2callout.html
@@ -44,14 +44,6 @@ a match context (see <b>pcre2_set_callout()</b> in the
documentation).
</P>
<P>
-When using the <b>pcre2_substitute()</b> function, an additional callout feature
-is available. This does a callout after each change to the subject string and
-is described in the
-<a href="pcre2api.html"><b>pcre2api</b></a>
-documentation; the rest of this document is concerned with callouts during
-pattern matching.
-</P>
-<P>
Within a regular expression, (?C&#60;arg&#62;) indicates a point at which the external
function is to be called. Different callout points can be identified by putting
a number less than 256 after the letter C. The default value is zero.
@@ -151,7 +143,7 @@ branch, automatic anchoring occurs if all branches are anchorable.
</P>
<P>
This optimization is disabled, however, if .* is in an atomic group or if there
-is a backreference to the capture group in which it appears. It is also
+is a backreference to the capturing group in which it appears. It is also
disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of
callouts does not affect it.
</P>
@@ -354,8 +346,8 @@ callout before an assertion such as (?=ab) the length is 3. For an an
alternation bar or a closing parenthesis, the length is one, unless a closing
parenthesis is followed by a quantifier, in which case its length is included.
(This changed in release 10.23. In earlier releases, before an opening
-parenthesis the length was that of the entire group, and before an alternation
-bar or a closing parenthesis the length was zero.)
+parenthesis the length was that of the entire subpattern, and before an
+alternation bar or a closing parenthesis the length was zero.)
</P>
<P>
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
@@ -471,9 +463,9 @@ Cambridge, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 03 February 2019
+Last updated: 26 April 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2compat.html b/dist2/doc/html/pcre2compat.html
index 7d728f58..f5932049 100644
--- a/dist2/doc/html/pcre2compat.html
+++ b/dist2/doc/html/pcre2compat.html
@@ -36,9 +36,10 @@ assertion just once). Perl allows some repeat quantifiers on other assertions,
for example, \b* (but not \b{3}), but these do not seem to have any use.
</P>
<P>
-3. Capture groups that occur inside negative lookaround assertions are counted,
-but their entries in the offsets vector are set only when a negative assertion
-is a condition that has a matching branch (that is, the condition is false).
+3. Capturing subpatterns that occur inside negative lookaround assertions are
+counted, but their entries in the offsets vector are set only when a negative
+assertion is a condition that has a matching branch (that is, the condition is
+false).
</P>
<P>
4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
@@ -47,9 +48,8 @@ non-newline character, and \N{U+dd..}, matching a Unicode code point, are
supported. The escapes that modify the case of following letters are
implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE2, an error is
-generated by default. However, if either of the PCRE2_ALT_BSUX or
-PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript
-interprets them.
+generated by default. However, if the PCRE2_ALT_BSUX option is set, \U and \u
+are interpreted as ECMAScript interprets them.
</P>
<P>
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
@@ -94,13 +94,13 @@ to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
into subroutine calls is now supported, as in Perl.
</P>
<P>
-9. If any of the backtracking control verbs are used in a group that is called
-as a subroutine (whether or not recursively), their effect is confined to that
-group; it does not extend to the surrounding pattern. This is not always the
-case in Perl. In particular, if (*THEN) is present in a group that is called as
-a subroutine, its action is limited to that group, even if the group does not
-contain any | characters. Note that such groups are processed as anchored
-at the point where they are tested.
+9. If any of the backtracking control verbs are used in a subpattern that is
+called as a subroutine (whether or not recursively), their effect is confined
+to that subpattern; it does not extend to the surrounding pattern. This is not
+always the case in Perl. In particular, if (*THEN) is present in a group that
+is called as a subroutine, its action is limited to that group, even if the
+group does not contain any | characters. Note that such subpatterns are
+processed as anchored at the point where they are tested.
</P>
<P>
10. If a pattern contains more than one backtracking control verb, the first
@@ -120,21 +120,22 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
"b".
</P>
<P>
-13. PCRE2's handling of duplicate capture group numbers and names is not as
-general as Perl's. This is a consequence of the fact the PCRE2 works internally
-just with numbers, using an external table to translate between numbers and
-names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B), where the two
-capture groups have the same number but different names, is not supported, and
-causes an error at compile time. If it were allowed, it would not be possible
-to distinguish which group matched, because both names map to capture group
-number 1. To avoid this confusing situation, an error is given at compile time.
+13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
+names is not as general as Perl's. This is a consequence of the fact the PCRE2
+works internally just with numbers, using an external table to translate
+between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B),
+where the two capturing parentheses have the same number but different names,
+is not supported, and causes an error at compile time. If it were allowed, it
+would not be possible to distinguish which parentheses matched, because both
+names map to capturing subpattern number 1. To avoid this confusing situation,
+an error is given at compile time.
</P>
<P>
14. Perl used to recognize comments in some places that PCRE2 does not, for
-example, between the ( and ? at the start of a group. If the /x modifier is
-set, Perl allowed white space between ( and ? though the latest Perls give an
-error (for a while it was just deprecated). There may still be some cases where
-Perl behaves differently.
+example, between the ( and ? at the start of a subpattern. If the /x modifier
+is set, Perl allowed white space between ( and ? though the latest Perls give
+an error (for a while it was just deprecated). There may still be some cases
+where Perl behaves differently.
</P>
<P>
15. Perl, when in warning mode, gives warnings for character classes such as
@@ -234,9 +235,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 12 February 2019
+Last updated: 28 July 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2grep.html b/dist2/doc/html/pcre2grep.html
index d66cee34..272b87d1 100644
--- a/dist2/doc/html/pcre2grep.html
+++ b/dist2/doc/html/pcre2grep.html
@@ -853,12 +853,10 @@ character. Otherwise <b>pcre2grep</b> will assume that it has no data.
<P>
<b>pcre2grep</b> has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of PCRE2's
-callout facility. However, this support can be completely or partially disabled
-when <b>pcre2grep</b> is built. You can find out whether your binary has support
-for callouts by running it with the <b>--help</b> option. If callout support is
-completely disabled, all callouts in patterns are ignored by <b>pcre2grep</b>.
-If the facility is partially disabled, calling external programs is not
-supported, and callouts that request it are ignored.
+callout facility. However, this support can be disabled when <b>pcre2grep</b> is
+built. You can find out whether your binary has support for callouts by running
+it with the <b>--help</b> option. If the support is not enabled, all callouts in
+patterns are ignored by <b>pcre2grep</b>.
</P>
<P>
A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is
@@ -871,12 +869,6 @@ only callouts with string arguments are useful.
Calling external programs or scripts
</b><br>
<P>
-This facility can be independently disabled when <b>pcre2grep</b> is built. It
-is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
-where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
-<b>fork()</b> and <b>execv()</b> are available.
-</P>
-<P>
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
@@ -905,7 +897,7 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
Arg1: [1] [234] [4] Arg2: |1| ()
12345
</pre>
-The parameters for the system call that is used to run the
+The parameters for the <b>execv()</b> system call that is used to run the
program or script are zero-terminated strings. This means that binary zero
characters in the callout argument will cause premature termination of their
substrings, and therefore should not be present. Any syntax errors in the
@@ -918,15 +910,14 @@ matcher backtracks in the normal way.
Echoing a specific string
</b><br>
<P>
-This facility is always available, provided that callouts were not completely
-disabled when <b>pcre2grep</b> was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+If the callout string starts with a pipe (vertical bar) character, the rest of
+the string is written to the output, having been passed through the same escape
+processing as text from the --output option. This provides a simple echoing
+facility that avoids calling an external program or script. No terminator is
+added to the string, so if you want a newline, you must include it explicitly.
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+relevant pattern with (*FAIL).
</P>
<br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
<P>
@@ -971,7 +962,7 @@ Cambridge, England.
</P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 24 November 2018
+Last updated: 24 February 2018
<br>
Copyright &copy; 1997-2018 University of Cambridge.
<br>
diff --git a/dist2/doc/html/pcre2jit.html b/dist2/doc/html/pcre2jit.html
index cb4eb88e..fa007e0b 100644
--- a/dist2/doc/html/pcre2jit.html
+++ b/dist2/doc/html/pcre2jit.html
@@ -16,17 +16,16 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC1" href="#SEC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC2" href="#SEC2">AVAILABILITY OF JIT SUPPORT</a>
<li><a name="TOC3" href="#SEC3">SIMPLE USE OF JIT</a>
-<li><a name="TOC4" href="#SEC4">MATCHING SUBJECTS CONTAINING INVALID UTF</a>
-<li><a name="TOC5" href="#SEC5">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
-<li><a name="TOC6" href="#SEC6">RETURN VALUES FROM JIT MATCHING</a>
-<li><a name="TOC7" href="#SEC7">CONTROLLING THE JIT STACK</a>
-<li><a name="TOC8" href="#SEC8">JIT STACK FAQ</a>
-<li><a name="TOC9" href="#SEC9">FREEING JIT SPECULATIVE MEMORY</a>
-<li><a name="TOC10" href="#SEC10">EXAMPLE CODE</a>
-<li><a name="TOC11" href="#SEC11">JIT FAST PATH API</a>
-<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
-<li><a name="TOC13" href="#SEC13">AUTHOR</a>
-<li><a name="TOC14" href="#SEC14">REVISION</a>
+<li><a name="TOC4" href="#SEC4">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
+<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
+<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
+<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
+<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
+<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
+<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
+<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
+<li><a name="TOC12" href="#SEC12">AUTHOR</a>
+<li><a name="TOC13" href="#SEC13">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
@@ -145,35 +144,12 @@ support is not available, or the pattern was not processed by
<b>pcre2_jit_compile()</b>, or the JIT compiler was not able to handle the
pattern.
</P>
-<br><a name="SEC4" href="#TOC1">MATCHING SUBJECTS CONTAINING INVALID UTF</a><br>
-<P>
-When a pattern is compiled with the PCRE2_UTF option, the interpretive matching
-function expects its subject string to be a valid sequence of UTF code units.
-If it is not, the result is undefined. This is also true by default of matching
-via JIT. However, if the option PCRE2_JIT_INVALID_UTF is passed to
-<b>pcre2_jit_compile()</b>, code that can process a subject containing invalid
-UTF is compiled.
-</P>
-<P>
-In this mode, an invalid code unit sequence never matches any pattern item. It
-does not match dot, it does not match \p{Any}, it does not even match negative
-items such as [^X]. A lookbehind assertion fails if it encounters an invalid
-sequence while moving the current point backwards. In other words, an invalid
-UTF code unit sequence acts as a barrier which no match can cross. Reaching an
-invalid sequence causes an immediate backtrack.
-</P>
-<P>
-Using this option, an application can run matches in arbitrary data, knowing
-that any matched strings that are returned will be valid UTF. This can be
-useful when searching for text in executable or other binary files.
-</P>
-<br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
+<br><a name="SEC4" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
<P>
The <b>pcre2_match()</b> options that are supported for JIT matching are
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
-PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
-PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not
-supported at match time.
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
+PCRE2_ANCHORED option is not supported at match time.
</P>
<P>
If the PCRE2_NO_JIT option is passed to <b>pcre2_match()</b> it disables the
@@ -184,7 +160,7 @@ The only unsupported pattern items are \C (match a single data unit) when
running in a UTF mode, and a callout immediately before an assertion condition
in a conditional group.
</P>
-<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
+<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
<P>
When a pattern is matched using JIT matching, the return values are the same
as those given by the interpretive <b>pcre2_match()</b> code, with the addition
@@ -200,7 +176,7 @@ circumstance when JIT is not used, but the details of exactly what is counted
are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned
when JIT matching is used.
<a name="stackcontrol"></a></P>
-<br><a name="SEC7" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
+<br><a name="SEC6" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
<P>
When the compiled JIT code runs, it needs a block of memory to use as a stack.
By default, it uses 32KiB on the machine stack. However, some large or
@@ -293,7 +269,7 @@ non-default JIT stacks might operate:
</pre>
All the functions described in this section do nothing if JIT is not available.
<a name="stackfaq"></a></P>
-<br><a name="SEC8" href="#TOC1">JIT STACK FAQ</a><br>
+<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
<P>
(1) Why do we need JIT stacks?
<br>
@@ -372,7 +348,7 @@ stack handling?
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
out this complicated API.
</P>
-<br><a name="SEC9" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
+<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
<P>
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
</P>
@@ -384,7 +360,7 @@ all possible memory. You can cause this to happen by calling
pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
memory management, or NULL for standard memory management.
</P>
-<br><a name="SEC10" href="#TOC1">EXAMPLE CODE</a><br>
+<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
<P>
This is a single-threaded example that specifies a JIT stack without using a
callback. A real program should include error checking after all the function
@@ -413,7 +389,7 @@ calls.
</PRE>
</P>
-<br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
+<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
<P>
Because the API described above falls back to interpreted matching when JIT is
not available, it is convenient for programs that are written for general use
@@ -426,13 +402,10 @@ processed by <b>pcre2_jit_compile()</b>).
</P>
<P>
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
-the same arguments as <b>pcre2_match()</b>. However, the subject string must be
-specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported
-option bits (for example, PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is the PCRE2_NO_JIT option. The
-return values are also the same as for <b>pcre2_match()</b>, plus
-PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
-that was not compiled.
+the same arguments as <b>pcre2_match()</b>. The return values are also the same,
+plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
+requested that was not compiled. Unsupported option bits (for example,
+PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT option.
</P>
<P>
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
@@ -446,11 +419,11 @@ invalid data is passed, the result is undefined.
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
speedups of more than 10%.
</P>
-<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2api</b>(3)
</P>
-<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel (FAQ by Zoltan Herczeg)
<br>
@@ -459,11 +432,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC14" href="#TOC1">REVISION</a><br>
+<br><a name="SEC13" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 06 March 2019
+Last updated: 28 June 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2limits.html b/dist2/doc/html/pcre2limits.html
index c8bc01b8..d90cdc3d 100644
--- a/dist2/doc/html/pcre2limits.html
+++ b/dist2/doc/html/pcre2limits.html
@@ -50,17 +50,17 @@ All values in repeating quantifiers must be less than 65536.
The maximum length of a lookbehind assertion is 65535 characters.
</P>
<P>
-There is no limit to the number of parenthesized groups, but there can be no
-more than 65535 capture groups, and there is a limit to the depth of nesting of
-parenthesized subpatterns of all kinds. This is imposed in order to limit the
-amount of system stack used at compile time. The default limit can be specified
-when PCRE2 is built; if not, the default is set to 250. An application can
-change this limit by calling pcre2_set_parens_nest_limit() to set the limit in
-a compile context.
+There is no limit to the number of parenthesized subpatterns, but there can be
+no more than 65535 capturing subpatterns. There is, however, a limit to the
+depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
+order to limit the amount of system stack used at compile time. The default
+limit can be specified when PCRE2 is built; if not, the default is set to 250.
+An application can change this limit by calling pcre2_set_parens_nest_limit()
+to set the limit in a compile context.
</P>
<P>
-The maximum length of name for a named capture group is 32 code units, and the
-maximum number of such groups is 10000.
+The maximum length of name for a named subpattern is 32 code units, and the
+maximum number of named subpatterns is 10000.
</P>
<P>
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
@@ -86,9 +86,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 02 February 2019
+Last updated: 30 March 2017
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2017 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2matching.html b/dist2/doc/html/pcre2matching.html
index 3e32cb57..602f3b26 100644
--- a/dist2/doc/html/pcre2matching.html
+++ b/dist2/doc/html/pcre2matching.html
@@ -134,8 +134,7 @@ do want multiple matches in such cases, either use an ungreedy repeat
</P>
<P>
There are a number of features of PCRE2 regular expressions that are not
-supported or behave differently in the alternative matching function. Those
-that are not supported cause an error if encountered.
+supported by the alternative matching algorithm. They are as follows:
</P>
<P>
1. Because the algorithm finds all possible matches, the greedy or ungreedy
@@ -160,32 +159,29 @@ do this. This means that no captured substrings are available.
</P>
<P>
3. Because no substrings are captured, backreferences within the pattern are
-not supported.
+not supported, and cause errors if encountered.
</P>
<P>
4. For the same reason, conditional expressions that use a backreference as the
condition or test for a specific group recursion are not supported.
</P>
<P>
-5. Again for the same reason, script runs are not supported.
-</P>
-<P>
-6. Because many paths through the tree may be active, the \K escape sequence,
+5. Because many paths through the tree may be active, the \K escape sequence,
which resets the start of the match when encountered (but may be on some paths
-and not on others), is not supported.
+and not on others), is not supported. It causes an error if encountered.
</P>
<P>
-7. Callouts are supported, but the value of the <i>capture_top</i> field is
+6. Callouts are supported, but the value of the <i>capture_top</i> field is
always 1, and the value of the <i>capture_last</i> field is always 0.
</P>
<P>
-8. The \C escape sequence, which (in the standard algorithm) always matches a
+7. The \C escape sequence, which (in the standard algorithm) always matches a
single code unit, even in a UTF mode, is not supported in these modes, because
the alternative algorithm moves through the subject string one character (not
code unit) at a time, for all active paths through the tree.
</P>
<P>
-9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
+8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
</P>
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
@@ -219,7 +215,7 @@ because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
</P>
<P>
-2. Capturing parentheses, backreferences, and script runs are not supported.
+2. Capturing parentheses and backreferences are not supported.
</P>
<P>
3. Although atomic groups are supported, their use does not provide the
@@ -236,9 +232,9 @@ Cambridge, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 10 October 2018
+Last updated: 29 September 2014
<br>
-Copyright &copy; 1997-2018 University of Cambridge.
+Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2pattern.html b/dist2/doc/html/pcre2pattern.html
index e6958c13..e43e98e4 100644
--- a/dist2/doc/html/pcre2pattern.html
+++ b/dist2/doc/html/pcre2pattern.html
@@ -26,24 +26,23 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC11" href="#SEC11">COMPATIBILITY FEATURE FOR WORD BOUNDARIES</a>
<li><a name="TOC12" href="#SEC12">VERTICAL BAR</a>
<li><a name="TOC13" href="#SEC13">INTERNAL OPTION SETTING</a>
-<li><a name="TOC14" href="#SEC14">GROUPS</a>
-<li><a name="TOC15" href="#SEC15">DUPLICATE GROUP NUMBERS</a>
-<li><a name="TOC16" href="#SEC16">NAMED CAPTURE GROUPS</a>
+<li><a name="TOC14" href="#SEC14">SUBPATTERNS</a>
+<li><a name="TOC15" href="#SEC15">DUPLICATE SUBPATTERN NUMBERS</a>
+<li><a name="TOC16" href="#SEC16">NAMED SUBPATTERNS</a>
<li><a name="TOC17" href="#SEC17">REPETITION</a>
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
<li><a name="TOC19" href="#SEC19">BACKREFERENCES</a>
<li><a name="TOC20" href="#SEC20">ASSERTIONS</a>
-<li><a name="TOC21" href="#SEC21">SCRIPT RUNS</a>
-<li><a name="TOC22" href="#SEC22">CONDITIONAL GROUPS</a>
-<li><a name="TOC23" href="#SEC23">COMMENTS</a>
-<li><a name="TOC24" href="#SEC24">RECURSIVE PATTERNS</a>
-<li><a name="TOC25" href="#SEC25">GROUPS AS SUBROUTINES</a>
-<li><a name="TOC26" href="#SEC26">ONIGURUMA SUBROUTINE SYNTAX</a>
-<li><a name="TOC27" href="#SEC27">CALLOUTS</a>
-<li><a name="TOC28" href="#SEC28">BACKTRACKING CONTROL</a>
-<li><a name="TOC29" href="#SEC29">SEE ALSO</a>
-<li><a name="TOC30" href="#SEC30">AUTHOR</a>
-<li><a name="TOC31" href="#SEC31">REVISION</a>
+<li><a name="TOC21" href="#SEC21">CONDITIONAL SUBPATTERNS</a>
+<li><a name="TOC22" href="#SEC22">COMMENTS</a>
+<li><a name="TOC23" href="#SEC23">RECURSIVE PATTERNS</a>
+<li><a name="TOC24" href="#SEC24">SUBPATTERNS AS SUBROUTINES</a>
+<li><a name="TOC25" href="#SEC25">ONIGURUMA SUBROUTINE SYNTAX</a>
+<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
+<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
+<li><a name="TOC28" href="#SEC28">SEE ALSO</a>
+<li><a name="TOC29" href="#SEC29">AUTHOR</a>
+<li><a name="TOC30" href="#SEC30">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION DETAILS</a><br>
<P>
@@ -63,13 +62,13 @@ by O'Reilly, covers regular expressions in great detail. This description of
PCRE2's regular expressions is intended as reference material.
</P>
<P>
-This document discusses the regular expression patterns that are supported by
-PCRE2 when its main matching function, <b>pcre2_match()</b>, is used. PCRE2 also
-has an alternative matching function, <b>pcre2_dfa_match()</b>, which matches
-using a different algorithm that is not Perl-compatible. Some of the features
-discussed below are not available when DFA matching is used. The advantages and
-disadvantages of the alternative function, and how it differs from the normal
-function, are discussed in the
+This document discusses the patterns that are supported by PCRE2 when its main
+matching function, <b>pcre2_match()</b>, is used. PCRE2 also has an alternative
+matching function, <b>pcre2_dfa_match()</b>, which matches using a different
+algorithm that is not Perl-compatible. Some of the features discussed below are
+not available when DFA matching is used. The advantages and disadvantages of
+the alternative function, and how it differs from the normal function, are
+discussed in the
<a href="pcre2matching.html"><b>pcre2matching</b></a>
page.
</P>
@@ -183,8 +182,8 @@ also an explicit memory limit that can be set.
</P>
<P>
These facilities are provided to catch runaway matches that are provoked by
-patterns with huge matching trees. A common example is a pattern with nested
-unlimited repeats applied to a long string that does not match. When one of
+patterns with huge matching trees (a typical example is a pattern with nested
+unlimited repeats applied to a long string that does not match). When one of
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
can also be set by items at the start of the pattern of the form
<pre>
@@ -290,10 +289,10 @@ caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case.
</P>
<P>
-The power of regular expressions comes from the ability to include wild cards,
-character classes, alternatives, and repetitions in the pattern. These are
-encoded in the pattern by the use of <i>metacharacters</i>, which do not stand
-for themselves but instead are interpreted in some special way.
+The power of regular expressions comes from the ability to include alternatives
+and repetitions in the pattern. These are encoded in the pattern by the use of
+<i>metacharacters</i>, which do not stand for themselves but instead are
+interpreted in some special way.
</P>
<P>
There are two different sets of metacharacters: those that are recognized
@@ -307,11 +306,14 @@ are as follows:
. match any character except newline (by default)
[ start character class definition
| start of alternative branch
- ( start group or control verb
- ) end group or control verb
+ ( start subpattern
+ ) end subpattern
+ ? extends the meaning of (
+ also 0 or 1 quantifier
+ also quantifier minimizer
* 0 or more quantifier
- + 1 or more quantifier; also "possessive quantifier"
- ? 0 or 1 quantifier; also quantifier minimizer
+ + 1 or more quantifier
+ also "possessive quantifier"
{ start min/max quantifier
</pre>
Part of a pattern that is in square brackets is called a "character class". In
@@ -320,7 +322,7 @@ a character class the only metacharacters are:
\ general escape character
^ negate the class, but only if the first character
- indicates character range
- [ POSIX character class (if followed by POSIX syntax)
+ [ POSIX character class (only if followed by POSIX syntax)
] terminates the character class
</pre>
The following sections describe the use of each of the metacharacters.
@@ -328,7 +330,7 @@ The following sections describe the use of each of the metacharacters.
<br><a name="SEC5" href="#TOC1">BACKSLASH</a><br>
<P>
The backslash character has several uses. Firstly, if it is followed by a
-character that is not a digit or a letter, it takes away any special meaning
+character that is not a number or a letter, it takes away any special meaning
that character may have. This use of backslash as an escape character applies
both inside and outside character classes.
</P>
@@ -340,7 +342,7 @@ precede a non-alphanumeric with backslash to specify that it stands for itself.
In particular, if you want to match a backslash, you write \\.
</P>
<P>
-In a UTF mode, only ASCII digits and letters have any special meaning after a
+In a UTF mode, only ASCII numbers and letters have any special meaning after a
backslash. All other characters (in particular, those whose code points are
greater than 127) are treated as literals.
</P>
@@ -352,13 +354,13 @@ escaping backslash can be used to include a white space or # character as part
of the pattern.
</P>
<P>
-If you want to treat all characters in a sequence as literals, you can do so by
-putting them between \Q and \E. This is different from Perl in that $ and @
-are handled as literals in \Q...\E sequences in PCRE2, whereas in Perl, $ and
-@ cause variable interpolation. Also, Perl does "double-quotish backslash
-interpolation" on any backslashes between \Q and \E which, its documentation
-says, "may lead to confusing results". PCRE2 treats a backslash between \Q and
-\E just like any other character. Note the following examples:
+If you want to remove the special meaning from a sequence of characters, you
+can do so by putting them between \Q and \E. This is different from Perl in
+that $ and @ are handled as literals in \Q...\E sequences in PCRE2, whereas
+in Perl, $ and @ cause variable interpolation. Also, Perl does "double-quotish
+backslash interpolation" on any backslashes between \Q and \E which, its
+documentation says, "may lead to confusing results". PCRE2 treats a backslash
+between \Q and \E just like any other character. Note the following examples:
<pre>
Pattern PCRE2 matches Perl matches
@@ -383,15 +385,15 @@ A second use of backslash provides a way of encoding non-printing characters
in patterns in a visible manner. There is no restriction on the appearance of
non-printing characters in a pattern, but when a pattern is being prepared by
text editing, it is often easier to use one of the following escape sequences
-instead of the binary character it represents. In an ASCII or Unicode
-environment, these escapes are as follows:
+than the binary character it represents. In an ASCII or Unicode environment,
+these escapes are as follows:
<pre>
\a alarm, that is, the BEL character (hex 07)
\cx "control-x", where x is any printable ASCII character
\e escape (hex 1B)
\f form feed (hex 0C)
\n linefeed (hex 0A)
- \r carriage return (hex 0D) (but see below)
+ \r carriage return (hex 0D)
\t tab (hex 09)
\0dd character with octal code 0dd
\ddd character with octal code ddd, or backreference
@@ -399,35 +401,8 @@ environment, these escapes are as follows:
\xhh character with hex code hh
\x{hhh..} character with hex code hhh..
\N{U+hhh..} character with Unicode hex code point hhh..
+ \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
</pre>
-By default, after \x that is not followed by {, from zero to two hexadecimal
-digits are read (letters can be in upper or lower case). Any number of
-hexadecimal digits may appear between \x{ and }. If a character other than a
-hexadecimal digit appears between \x{ and }, or if there is no terminating },
-an error occurs.
-</P>
-<P>
-Characters whose code points are less than 256 can be defined by either of the
-two syntaxes for \x or by an octal sequence. There is no difference in the way
-they are handled. For example, \xdc is exactly the same as \x{dc} or \334.
-However, using the braced versions does make such sequences easier to read.
-</P>
-<P>
-Support is available for some ECMAScript (aka JavaScript) escape sequences via
-two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed
-by { is not recognized. Only if \x is followed by two hexadecimal digits is it
-recognized as a character escape. Otherwise it is interpreted as a literal "x"
-character. In this mode, support for code points greater than 256 is provided
-by \u, which must be followed by four hexadecimal digits; otherwise it is
-interpreted as a literal "u" character.
-</P>
-<P>
-PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
-\u{hhh..} is recognized as the character specified by hexadecimal code point.
-There may be any number of hexadecimal digits. This syntax is from ECMAScript
-6.
-</P>
-<P>
The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses
\N{name} to specify characters by Unicode name; PCRE2 does not support this.
@@ -435,12 +410,6 @@ Note that when \N is not followed by an opening brace (curly bracket) it has
an entirely different meaning, matching any character that is not a newline.
</P>
<P>
-There are some legacy applications where the escape sequence \r is expected to
-match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \r in a
-pattern is converted to \n so that it matches a LF (linefeed) instead of a CR
-(carriage return) character.
-</P>
-<P>
The precise effect of \cx on ASCII characters is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the character (hex
40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A),
@@ -499,12 +468,12 @@ and Perl has changed over time, causing PCRE2 also to change.
<P>
Outside a character class, PCRE2 reads the digit and any following digits as a
decimal number. If the number is less than 10, begins with the digit 8 or 9, or
-if there are at least that many previous capture groups in the expression, the
-entire sequence is taken as a <i>backreference</i>. A description of how this
-works is given
+if there are at least that many previous capturing left parentheses in the
+expression, the entire sequence is taken as a <i>backreference</i>. A
+description of how this works is given
<a href="#backreferences">later,</a>
following the discussion of
-<a href="#group">parenthesized groups.</a>
+<a href="#subpattern">parenthesized subpatterns.</a>
Otherwise, up to three octal digits are read to form a character code.
</P>
<P>
@@ -514,7 +483,7 @@ backslash, using them to generate a data character. Any subsequent digits stand
for themselves. For example, outside a character class:
<pre>
\040 is another way of writing an ASCII space
- \40 is the same, provided there are fewer than 40 previous capture groups
+ \40 is the same, provided there are fewer than 40 previous capturing subpatterns
\7 is always a backreference
\11 might be a backreference, or another way of writing a tab
\011 is always a tab
@@ -527,6 +496,26 @@ Note that octal values of 100 or greater that are specified using this syntax
must not be introduced by a leading zero, because no more than three octal
digits are ever read.
</P>
+<P>
+By default, after \x that is not followed by {, from zero to two hexadecimal
+digits are read (letters can be in upper or lower case). Any number of
+hexadecimal digits may appear between \x{ and }. If a character other than
+a hexadecimal digit appears between \x{ and }, or if there is no terminating
+}, an error occurs.
+</P>
+<P>
+If the PCRE2_ALT_BSUX option is set, the interpretation of \x is as just
+described only when it is followed by two hexadecimal digits. Otherwise, it
+matches a literal "x" character. In this mode, support for code points greater
+than 256 is provided by \u, which must be followed by four hexadecimal digits;
+otherwise it matches a literal "u" character.
+</P>
+<P>
+Characters whose value is less than 256 can be defined by either of the two
+syntaxes for \x (or by \u in PCRE2_ALT_BSUX mode). There is no difference in
+the way they are handled. For example, \xdc is exactly the same as \x{dc} (or
+\u00dc in PCRE2_ALT_BSUX mode).
+</P>
<br><b>
Constraints on character values
</b><br>
@@ -565,10 +554,9 @@ Unsupported escape sequences
<P>
In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its string
handler and used to modify the case of following characters. By default, PCRE2
-does not support these escape sequences in patterns. However, if either of the
-PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U matches a "U"
-character, and \u can be used to define a character by code point, as
-described above.
+does not support these escape sequences. However, if the PCRE2_ALT_BSUX option
+is set, \U matches a "U" character, and \u can be used to define a character
+by code point, as described above.
</P>
<br><b>
Absolute and relative backreferences
@@ -579,7 +567,7 @@ in braces, is an absolute or relative backreference. A named backreference
can be coded as \g{name}. Backreferences are discussed
<a href="#backreferences">later,</a>
following the discussion of
-<a href="#group">parenthesized groups.</a>
+<a href="#subpattern">parenthesized subpatterns.</a>
</P>
<br><b>
Absolute and relative subroutine calls
@@ -587,11 +575,11 @@ Absolute and relative subroutine calls
<P>
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
-syntax for referencing a capture group as a subroutine. Details are discussed
+syntax for referencing a subpattern as a "subroutine". Details are discussed
<a href="#onigurumasubroutines">later.</a>
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
synonymous. The former is a backreference; the latter is a
-<a href="#groupsassubroutines">subroutine</a>
+<a href="#subpatternsassubroutines">subroutine</a>
call.
<a name="genericchartypes"></a></P>
<br><b>
@@ -758,22 +746,21 @@ Unicode character properties
</b><br>
<P>
When PCRE2 is built with Unicode support (the default), three additional escape
-sequences that match characters with specific properties are available. They
-can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
-sequences are of course limited to testing characters whose code points are
-less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
-greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+sequences that match characters with specific properties are available. In
+8-bit non-UTF-8 mode, these sequences are of course limited to testing
+characters whose code points are less than 256, but they do work in this mode.
+In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit)
+may be encountered. These are all treated as being in the Common script and
+with an unassigned type. The extra escape sequences are:
<pre>
\p{<i>xx</i>} a character with the <i>xx</i> property
\P{<i>xx</i>} a character without the <i>xx</i> property
\X a Unicode extended grapheme cluster
</pre>
-The property names represented by <i>xx</i> above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
+The property names represented by <i>xx</i> above are limited to the Unicode
+script names, the general category properties, "Any", which matches any
+character (including newline), and some special PCRE2 properties (described
+in the
<a href="#extraprops">next section).</a>
Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2.
Note that \P{Any} does not match any characters, so always causes a match
@@ -787,10 +774,8 @@ example:
\p{Greek}
\P{Han}
</pre>
-Unassigned characters (and in non-UTF 32-bit mode, characters with code points
-greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
-part of an identified script are lumped together as "Common". The current list
-of scripts is:
+Those that are not part of an identified script are lumped together as
+"Common". The current list of scripts is:
</P>
<P>
Adlam,
@@ -937,7 +922,6 @@ Tibetan,
Tifinagh,
Tirhuta,
Ugaritic,
-Unknown,
Vai,
Warang_Citi,
Yi,
@@ -1009,14 +993,12 @@ the Lu, Ll, or Lt property, in other words, a letter that is not classified as
a modifier or "other".
</P>
<P>
-The Cs (Surrogate) property applies only to characters whose code points are in
-the range U+D800 to U+DFFF. These characters are no different to any other
-character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library).
-However, they are not valid in Unicode strings and so cannot be tested by PCRE2
-in UTF mode, unless UTF validity checking has been turned off (see the
-discussion of PCRE2_NO_UTF_CHECK in the
+The Cs (Surrogate) property applies only to characters in the range U+D800 to
+U+DFFF. Such characters are not valid in Unicode strings and so
+cannot be tested by PCRE2, unless UTF validity checking has been turned off
+(see the discussion of PCRE2_NO_UTF_CHECK in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-page).
+page). Perl does not support the Cs property.
</P>
<P>
The long synonyms for property names that Perl supports (such as \p{Letter})
@@ -1146,7 +1128,7 @@ a lookbehind assertion
However, in this case, the part of the subject before the real match does not
have to be of fixed length, as lookbehind assertions do. The use of \K does
not interfere with the setting of
-<a href="#group">captured substrings.</a>
+<a href="#subpattern">captured substrings.</a>
For example, when the pattern
<pre>
(foo)\Kbar
@@ -1174,7 +1156,7 @@ Simple assertions
The final use of backslash is for certain simple assertions. An assertion
specifies a condition that has to be met at a particular point in a match,
without consuming any characters from the subject string. The use of
-groups for more complicated assertions is described
+subpatterns for more complicated assertions is described
<a href="#bigassertions">below.</a>
The backslashed assertions are:
<pre>
@@ -1194,12 +1176,12 @@ character. If any other of these assertions appears in a character class, an
A word boundary is a position in the subject string where the current character
and the previous character do not both match \w or \W (i.e. one matches
\w and the other matches \W), or the start or end of the string if the
-first or last character matches \w, respectively. When PCRE2 is built with
-Unicode support, the meanings of \w and \W can be changed by setting the
-PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2
-nor Perl has a separate "start of word" or "end of word" metasequence. However,
-whatever follows \b normally determines which it is. For example, the fragment
-\ba matches "a" at the start of a word.
+first or last character matches \w, respectively. In a UTF mode, the meanings
+of \w and \W can be changed by setting the PCRE2_UCP option. When this is
+done, it also affects \b and \B. Neither PCRE2 nor Perl has a separate "start
+of word" or "end of word" metasequence. However, whatever follows \b normally
+determines which it is. For example, the fragment \ba matches "a" at the start
+of a word.
</P>
<P>
The \A, \Z, and \z assertions differ from the traditional circumflex and
@@ -1394,7 +1376,7 @@ could be used with a UTF-8 string (ignore white space and line breaks):
</pre>
In this example, a group that starts with (?| resets the capturing parentheses
numbers in each alternative (see
-<a href="#dupgroupnumber">"Duplicate Group Numbers"</a>
+<a href="#dupsubpatternnumber">"Duplicate Subpattern Numbers"</a>
below). The assertions at the start of each branch check the next UTF-8
character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The
character's individual bytes are then captured by the appropriate number of
@@ -1640,10 +1622,10 @@ the pattern
matches either "gilbert" or "sullivan". Any number of alternatives may appear,
and an empty alternative is permitted (matching the empty string). The matching
process tries each alternative in turn, from left to right, and the first one
-that succeeds is used. If the alternatives are within a group
-<a href="#group">(defined below),</a>
+that succeeds is used. If the alternatives are within a subpattern
+<a href="#subpattern">(defined below),</a>
"succeeds" means matching the rest of the main pattern as well as the
-alternative in the group.
+alternative in the subpattern.
</P>
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
<P>
@@ -1686,16 +1668,16 @@ respectively. However, these are not unset by (?^).
</P>
<P>
When one of these option changes occurs at top level (that is, not inside
-group parentheses), the change applies to the remainder of the pattern
-that follows. An option change within a group (see below for a description
-of groups) affects only that part of the group that follows it, so
+subpattern parentheses), the change applies to the remainder of the pattern
+that follows. An option change within a subpattern (see below for a description
+of subpatterns) affects only that part of the subpattern that follows it, so
<pre>
(a(?i)b)c
</pre>
matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not used).
By this means, options can be made to have different settings in different
parts of the pattern. Any changes made in one alternative do carry on
-into subsequent branches within the same group. For example,
+into subsequent branches within the same subpattern. For example,
<pre>
(a(?i)b|c)
</pre>
@@ -1706,7 +1688,7 @@ behaviour otherwise.
</P>
<P>
As a convenient shorthand, if any option settings are required at the start of
-a non-capturing group (see the next section), the option letters may
+a non-capturing subpattern (see the next section), the option letters may
appear between the "?" and the ":". Thus the two patterns
<pre>
(?i:saturday|sunday)
@@ -1715,22 +1697,21 @@ appear between the "?" and the ":". Thus the two patterns
match exactly the same set of strings.
</P>
<P>
-<b>Note:</b> There are other PCRE2-specific options, applying to the whole
-pattern, which can be set by the application when the compiling function is
-called. In addition, the pattern can contain special leading sequences such as
-(*CRLF) to override what the application has set or what has been defaulted.
-Details are given in the section entitled
+<b>Note:</b> There are other PCRE2-specific options that can be set by the
+application when the compiling function is called. The pattern can contain
+special leading sequences such as (*CRLF) to override what the application has
+set or what has been defaulted. Details are given in the section entitled
<a href="#newlineseq">"Newline sequences"</a>
above. There are also the (*UTF) and (*UCP) leading sequences that can be used
to set UTF and Unicode property modes; they are equivalent to setting the
PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set
the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP options, which lock out the use of the
(*UTF) and (*UCP) sequences.
-<a name="group"></a></P>
-<br><a name="SEC14" href="#TOC1">GROUPS</a><br>
+<a name="subpattern"></a></P>
+<br><a name="SEC14" href="#TOC1">SUBPATTERNS</a><br>
<P>
-Groups are delimited by parentheses (round brackets), which can be nested.
-Turning part of a pattern into a group does two things:
+Subpatterns are delimited by parentheses (round brackets), which can be nested.
+Turning part of a pattern into a subpattern does two things:
<br>
<br>
1. It localizes a set of alternatives. For example, the pattern
@@ -1741,16 +1722,16 @@ matches "cataract", "caterpillar", or "cat". Without the parentheses, it would
match "cataract", "erpillar" or an empty string.
<br>
<br>
-2. It creates a "capture group". This means that, when the whole pattern
-matches, the portion of the subject string that matched the group is passed
-back to the caller, separately from the portion that matched the whole pattern.
-(This applies only to the traditional matching function; the DFA matching
-function does not support capturing.)
+2. It sets up the subpattern as a capturing subpattern. This means that, when
+the whole pattern matches, the portion of the subject string that matched the
+subpattern is passed back to the caller, separately from the portion that
+matched the whole pattern. (This applies only to the traditional matching
+function; the DFA matching function does not support capturing.)
</P>
<P>
Opening parentheses are counted from left to right (starting from 1) to obtain
-numbers for capture groups. For example, if the string "the red king" is
-matched against the pattern
+numbers for the capturing subpatterns. For example, if the string "the red
+king" is matched against the pattern
<pre>
the ((red|white) (king|queen))
</pre>
@@ -1759,35 +1740,36 @@ the captured substrings are "red king", "red", and "king", and are numbered 1,
</P>
<P>
The fact that plain parentheses fulfil two functions is not always helpful.
-There are often times when grouping is required without capturing. If an
-opening parenthesis is followed by a question mark and a colon, the group
-does not do any capturing, and is not counted when computing the number of any
-subsequent capture groups. For example, if the string "the white queen"
-is matched against the pattern
+There are often times when a grouping subpattern is required without a
+capturing requirement. If an opening parenthesis is followed by a question mark
+and a colon, the subpattern does not do any capturing, and is not counted when
+computing the number of any subsequent capturing subpatterns. For example, if
+the string "the white queen" is matched against the pattern
<pre>
the ((?:red|white) (king|queen))
</pre>
the captured substrings are "white queen" and "queen", and are numbered 1 and
-2. The maximum number of capture groups is 65535.
+2. The maximum number of capturing subpatterns is 65535.
</P>
<P>
As a convenient shorthand, if any option settings are required at the start of
-a non-capturing group, the option letters may appear between the "?" and the
-":". Thus the two patterns
+a non-capturing subpattern, the option letters may appear between the "?" and
+the ":". Thus the two patterns
<pre>
(?i:saturday|sunday)
(?:(?i)saturday|sunday)
</pre>
match exactly the same set of strings. Because alternative branches are tried
-from left to right, and options are not reset until the end of the group is
-reached, an option setting in one branch does affect subsequent branches, so
+from left to right, and options are not reset until the end of the subpattern
+is reached, an option setting in one branch does affect subsequent branches, so
the above patterns match "SUNDAY" as well as "Saturday".
-<a name="dupgroupnumber"></a></P>
-<br><a name="SEC15" href="#TOC1">DUPLICATE GROUP NUMBERS</a><br>
+<a name="dupsubpatternnumber"></a></P>
+<br><a name="SEC15" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
<P>
-Perl 5.10 introduced a feature whereby each alternative in a group uses the
-same numbers for its capturing parentheses. Such a group starts with (?| and is
-itself a non-capturing group. For example, consider this pattern:
+Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
+the same numbers for its capturing parentheses. Such a subpattern starts with
+(?| and is itself a non-capturing subpattern. For example, consider this
+pattern:
<pre>
(?|(Sat)ur|(Sun))day
</pre>
@@ -1797,7 +1779,7 @@ at captured substring number one, whichever alternative matched. This construct
is useful when you want to capture part, but not all, of one of a number of
alternatives. Inside a (?| group, parentheses are numbered as usual, but the
number is reset at the start of each branch. The numbers of any capturing
-parentheses that follow the whole group start after the highest number used in
+parentheses that follow the subpattern start after the highest number used in
any branch. The following example is taken from the Perl documentation. The
numbers underneath show in which buffer the captured content will be stored.
<pre>
@@ -1805,12 +1787,13 @@ numbers underneath show in which buffer the captured content will be stored.
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
</pre>
-A backreference to a capture group uses the most recent value that is set for
-the group. The following pattern matches "abcabc" or "defdef":
+A backreference to a numbered subpattern uses the most recent value that is
+set for that number by any subpattern. The following pattern matches "abcabc"
+or "defdef":
<pre>
/(?|(abc)|(def))\1/
</pre>
-In contrast, a subroutine call to a capture group always refers to the
+In contrast, a subroutine call to a numbered subpattern always refers to the
first one in the pattern with the given number. The following pattern matches
"abcabc" or "defabc":
<pre>
@@ -1822,35 +1805,29 @@ of computing an absolute group number.
<P>
If a
<a href="#conditions">condition test</a>
-for a group's having matched refers to a non-unique number, the test is
-true if any group with that number has matched.
+for a subpattern's having matched refers to a non-unique number, the test is
+true if any of the subpatterns of that number have matched.
</P>
<P>
An alternative approach to using this "branch reset" feature is to use
-duplicate named groups, as described in the next section.
+duplicate named subpatterns, as described in the next section.
</P>
-<br><a name="SEC16" href="#TOC1">NAMED CAPTURE GROUPS</a><br>
+<br><a name="SEC16" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
-Identifying capture groups by number is simple, but it can be very hard to keep
-track of the numbers in complicated patterns. Furthermore, if an expression is
-modified, the numbers may change. To help with this difficulty, PCRE2 supports
-the naming of capture groups. This feature was not added to Perl until release
-5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0,
-using the Python syntax. PCRE2 supports both the Perl and the Python syntax.
+Identifying capturing parentheses by number is simple, but it can be very hard
+to keep track of the numbers in complicated patterns. Furthermore, if an
+expression is modified, the numbers may change. To help with this difficulty,
+PCRE2 supports the naming of capturing subpatterns. This feature was not added
+to Perl until release 5.10. Python had the feature earlier, and PCRE1
+introduced it at release 4.0, using the Python syntax. PCRE2 supports both the
+Perl and the Python syntax.
</P>
<P>
-In PCRE2, a capture group can be named in one of three ways: (?&#60;name&#62;...) or
-(?'name'...) as in Perl, or (?P&#60;name&#62;...) as in Python. Names may be up to 32
-code units long. When PCRE2_UTF is not set, they may contain only ASCII
-alphanumeric characters and underscores, but must start with a non-digit. When
-PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode
-letter or Unicode decimal digit. In other words, group names must match one of
-these patterns:
-<pre>
- ^[_A-Za-z][_A-Za-z0-9]*\z when PCRE2_UTF is not set
- ^[_\p{L}][_\p{L}\p{Nd}]*\z when PCRE2_UTF is set
-</pre>
-References to capture groups from other parts of the pattern, such as
+In PCRE2, a capturing subpattern can be named in one of three ways:
+(?&#60;name&#62;...) or (?'name'...) as in Perl, or (?P&#60;name&#62;...) as in Python. Names
+consist of up to 32 alphanumeric characters and underscores, but must start
+with a non-digit. References to capturing parentheses from other parts of the
+pattern, such as
<a href="#backreferences">backreferences,</a>
<a href="#recursion">recursion,</a>
and
@@ -1858,18 +1835,18 @@ and
can all be made by name as well as by number.
</P>
<P>
-Named capture groups are allocated numbers as well as names, exactly as
-if the names were not present. In both PCRE2 and Perl, capture groups
+Named capturing parentheses are allocated numbers as well as names, exactly as
+if the names were not present. In both PCRE2 and Perl, capturing subpatterns
are primarily identified by numbers; any names are just aliases for these
numbers. The PCRE2 API provides function calls for extracting the complete
name-to-number translation table from a compiled pattern, as well as
convenience functions for extracting captured substrings by name.
</P>
<P>
-<b>Warning:</b> When more than one capture group has the same number, as
-described in the previous section, a name given to one of them applies to all
-of them. Perl allows identically numbered groups to have different names.
-Consider this pattern, where there are two capture groups, both numbered 1:
+<b>Warning:</b> When more than one subpattern has the same number, as described
+in the previous section, a name given to one of them applies to all of them.
+Perl allows identically numbered subpatterns to have different names. Consider
+this pattern, where there are two capturing subpatterns, both numbered 1:
<pre>
(?|(?&#60;AA&#62;aa)|(?&#60;BB&#62;bb))
</pre>
@@ -1884,21 +1861,21 @@ pattern:
<pre>
(?|(?&#60;AA&#62;aa)|(bb))
</pre>
-Although the second group number 1 is not explicitly named, the name AA is
-still an alias for any group 1. Whether the pattern matches "aa" or "bb", a
+Although the second subpattern number 1 is not explicitly named, the name AA is
+still an alias for subpattern 1. Whether the pattern matches "aa" or "bb", a
reference by name to group AA yields the matched string.
</P>
<P>
By default, a name must be unique within a pattern, except that duplicate names
-are permitted for groups with the same number, for example:
+are permitted for subpatterns with the same number, for example:
<pre>
(?|(?&#60;AA&#62;aa)|(?&#60;AA&#62;bb))
</pre>
The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
+names can be useful for patterns where only one instance of the named
+parentheses can match. Suppose you want to match the name of a weekday, either
+as a 3-letter abbreviation or as the full name, and in both cases you want to
extract the abbreviation. This pattern (ignoring the line breaks) does the job:
<pre>
(?&#60;DN&#62;Mon|Fri|Sun)(?:day)?|
@@ -1907,26 +1884,26 @@ extract the abbreviation. This pattern (ignoring the line breaks) does the job:
(?&#60;DN&#62;Thu)(?:rsday)?|
(?&#60;DN&#62;Sat)(?:urday)?
</pre>
-There are five capture groups, but only one is ever set after a match. The
-convenience functions for extracting the data by name returns the substring for
-the first (and in this example, the only) group of that name that matched. This
-saves searching to find which numbered group it was. (An alternative way of
-solving this problem is to use a "branch reset" group, as described in the
-previous section.)
+There are five capturing substrings, but only one is ever set after a match.
+The convenience functions for extracting the data by name returns the substring
+for the first (and in this example, the only) subpattern of that name that
+matched. This saves searching to find which numbered subpattern it was. (An
+alternative way of solving this problem is to use a "branch reset" subpattern,
+as described in the previous section.)
</P>
<P>
-If you make a backreference to a non-unique named group from elsewhere in the
-pattern, the groups to which the name refers are checked in the order in which
-they appear in the overall pattern. The first one that is set is used for the
-reference. For example, this pattern matches both "foofoo" and "barbar" but not
-"foobar" or "barfoo":
+If you make a backreference to a non-unique named subpattern from elsewhere in
+the pattern, the subpatterns to which the name refers are checked in the order
+in which they appear in the overall pattern. The first one that is set is used
+for the reference. For example, this pattern matches both "foofoo" and
+"barbar" but not "foobar" or "barfoo":
<pre>
(?:(?&#60;n&#62;foo)|(?&#60;n&#62;bar))\k&#60;n&#62;
</PRE>
</P>
<P>
-If you make a subroutine call to a non-unique named group, the one that
+If you make a subroutine call to a non-unique named subpattern, the one that
corresponds to the first occurrence of the name is used. In the absence of
duplicate numbers this is the one with the lowest number.
</P>
@@ -1934,11 +1911,11 @@ duplicate numbers this is the one with the lowest number.
If you use a named reference in a condition
test (see the
<a href="#conditions">section about conditions</a>
-below), either to check whether a capture group has matched, or to check for
-recursion, all groups with the same name are tested. If the condition is true
-for any one of them, the overall condition is true. This is the same behaviour
-as testing by number. For further details of the interfaces for handling named
-capture groups, see the
+below), either to check whether a subpattern has matched, or to check for
+recursion, all subpatterns with the same name are tested. If the condition is
+true for any one of them, the overall condition is true. This is the same
+behaviour as testing by number. For further details of the interfaces for
+handling named subpatterns, see the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation.
</P>
@@ -1950,18 +1927,18 @@ items:
a literal data character
the dot metacharacter
the \C escape sequence
- the \R escape sequence
the \X escape sequence
+ the \R escape sequence
an escape such as \d or \pL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
- a subroutine call (recursive or otherwise)
+ a parenthesized subpattern (including most assertions)
+ a subroutine call to a subpattern (recursive or otherwise)
</pre>
The general repetition quantifier specifies a minimum and maximum number of
permitted matches, by giving the two numbers in curly brackets (braces),
separated by a comma. The numbers must be less than 65536, and the first must
-be less than or equal to the second. For example,
+be less than or equal to the second. For example:
<pre>
z{2,4}
</pre>
@@ -1991,12 +1968,12 @@ several code units long (and they may be of different lengths).
<P>
The quantifier {0} is permitted, causing the expression to behave as if the
previous item and the quantifier were not present. This may be useful for
-capture groups that are referenced as
-<a href="#groupsassubroutines">subroutines</a>
+subpatterns that are referenced as
+<a href="#subpatternsassubroutines">subroutines</a>
from elsewhere in the pattern (but see also the section entitled
-<a href="#subdefine">"Defining capture groups for use by reference only"</a>
-below). Except for parenthesized groups, items that have a {0} quantifier are
-omitted from the compiled pattern.
+<a href="#subdefine">"Defining subpatterns for use by reference only"</a>
+below). Items other than subpatterns that have a {0} quantifier are omitted
+from the compiled pattern.
</P>
<P>
For convenience, the three most common quantifiers have single-character
@@ -2006,23 +1983,23 @@ abbreviations:
+ is equivalent to {1,}
? is equivalent to {0,1}
</pre>
-It is possible to construct infinite loops by following a group that can match
-no characters with a quantifier that has no upper limit, for example:
+It is possible to construct infinite loops by following a subpattern that can
+match no characters with a quantifier that has no upper limit, for example:
<pre>
(a?)*
</pre>
Earlier versions of Perl and PCRE1 used to give an error at compile time for
such patterns. However, because there are cases where this can be useful, such
-patterns are now accepted, but if any repetition of the group does in fact
+patterns are now accepted, but if any repetition of the subpattern does in fact
match no characters, the loop is forcibly broken.
</P>
<P>
-By default, quantifiers are "greedy", that is, they match as much as possible
-(up to the maximum number of permitted times), without causing the rest of the
-pattern to fail. The classic example of where this gives problems is in trying
-to match comments in C programs. These appear between /* and */ and within the
-comment, individual * and / characters may appear. An attempt to match C
-comments by applying the pattern
+By default, the quantifiers are "greedy", that is, they match as much as
+possible (up to the maximum number of permitted times), without causing the
+rest of the pattern to fail. The classic example of where this gives problems
+is in trying to match comments in C programs. These appear between /* and */
+and within the comment, individual * and / characters may appear. An attempt to
+match C comments by applying the pattern
<pre>
/\*.*\*/
</pre>
@@ -2031,9 +2008,11 @@ to the string
/* first comment */ not comment /* second comment */
</pre>
fails, because it matches the entire string owing to the greediness of the .*
-item. However, if a quantifier is followed by a question mark, it ceases to be
-greedy, and instead matches the minimum number of times possible, so the
-pattern
+item.
+</P>
+<P>
+If a quantifier is followed by a question mark, it ceases to be greedy, and
+instead matches the minimum number of times possible, so the pattern
<pre>
/\*.*?\*/
</pre>
@@ -2054,7 +2033,7 @@ greedy by following them with a question mark. In other words, it inverts the
default behaviour.
</P>
<P>
-When a parenthesized group is quantified with a minimum repeat count that
+When a parenthesized subpattern is quantified with a minimum repeat count that
is greater than 1 or with a limited maximum, more memory is required for the
compiled pattern, in proportion to the size of the minimum or maximum.
</P>
@@ -2094,14 +2073,15 @@ It matches "ab" in the subject "aab". The use of the backtracking control verbs
PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly.
</P>
<P>
-When a capture group is repeated, the value captured is the substring that
-matched the final iteration. For example, after
+When a capturing subpattern is repeated, the value captured is the substring
+that matched the final iteration. For example, after
<pre>
(tweedle[dume]{3}\s*)+
</pre>
has matched "tweedledum tweedledee" the value of the captured substring is
-"tweedledee". However, if there are nested capture groups, the corresponding
-captured values may have been set in previous iterations. For example, after
+"tweedledee". However, if there are nested capturing subpatterns, the
+corresponding captured values may have been set in previous iterations. For
+example, after
<pre>
(a|(b))+
</pre>
@@ -2125,7 +2105,7 @@ After matching all 6 digits and then failing to match "foo", the normal
action of the matcher is to try again with only 5 digits matching the \d+
item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
(a term taken from Jeffrey Friedl's book) provides the means for specifying
-that once a group has matched, it is not to be re-evaluated in this way.
+that once a subpattern has matched, it is not to be re-evaluated in this way.
</P>
<P>
If we use atomic grouping for the previous example, the matcher gives up
@@ -2134,31 +2114,26 @@ special parenthesis, starting with (?&#62; as in this example:
<pre>
(?&#62;\d+)foo
</pre>
-Perl 5.28 introduced an experimental alphabetic form starting with (* which may
-be easier to remember:
-<pre>
- (*atomic:\d+)foo
-</pre>
-This kind of parenthesized group "locks up" the part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesis "locks up" the part of the pattern it contains once
+it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
</P>
<P>
-An alternative description is that a group of this type matches exactly the
-string of characters that an identical standalone pattern would match, if
+An alternative description is that a subpattern of this type matches exactly
+the string of characters that an identical standalone pattern would match, if
anchored at the current point in the subject string.
</P>
<P>
-Atomic groups are not capture groups. Simple cases such as the above example
-can be thought of as a maximizing repeat that must swallow everything it can.
-So, while both \d+ and \d+? are prepared to adjust the number of digits they
-match in order to make the rest of the pattern match, (?&#62;\d+) can only match
-an entire sequence of digits.
+Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as
+the above example can be thought of as a maximizing repeat that must swallow
+everything it can. So, while both \d+ and \d+? are prepared to adjust the
+number of digits they match in order to make the rest of the pattern match,
+(?&#62;\d+) can only match an entire sequence of digits.
</P>
<P>
Atomic groups in general can of course contain arbitrarily complicated
-expressions, and can be nested. However, when the contents of an atomic
+subpatterns, and can be nested. However, when the subpattern for an atomic
group is just a single repeated item, as in the example above, a simpler
notation, called a "possessive quantifier" can be used. This consists of an
additional + character following a quantifier. Using this notation, the
@@ -2181,8 +2156,8 @@ difference; possessive quantifiers should be slightly faster.
The possessive quantifier syntax is an extension to the Perl 5.8 syntax.
Jeffrey Friedl originated the idea (and the name) in the first edition of his
book. Mike McCloskey liked it, so implemented it when he built Sun's Java
-package, and PCRE1 copied it from there. It found its way into Perl at release
-5.10.
+package, and PCRE1 copied it from there. It ultimately found its way into Perl
+at release 5.10.
</P>
<P>
PCRE2 has an optimization that automatically "possessifies" certain simple
@@ -2192,9 +2167,10 @@ This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
</P>
<P>
-When a pattern contains an unlimited repeat inside a group that can itself be
-repeated an unlimited number of times, the use of an atomic group is the only
-way to avoid some failing matches taking a very long time indeed. The pattern
+When a pattern contains an unlimited repeat inside a subpattern that can itself
+be repeated an unlimited number of times, the use of an atomic group is the
+only way to avoid some failing matches taking a very long time indeed. The
+pattern
<pre>
(\D+|&#60;\d+&#62;)*[!?]
</pre>
@@ -2220,27 +2196,28 @@ sequences of non-digits cannot be broken, and failure happens quickly.
<br><a name="SEC19" href="#TOC1">BACKREFERENCES</a><br>
<P>
Outside a character class, a backslash followed by a digit greater than 0 (and
-possibly further digits) is a backreference to a capture group earlier (that
-is, to its left) in the pattern, provided there have been that many previous
-capture groups.
+possibly further digits) is a backreference to a capturing subpattern earlier
+(that is, to its left) in the pattern, provided there have been that many
+previous capturing left parentheses.
</P>
<P>
However, if the decimal number following the backslash is less than 8, it is
-always taken as a backreference, and causes an error only if there are not that
-many capture groups in the entire pattern. In other words, the group that is
-referenced need not be to the left of the reference for numbers less than 8. A
-"forward backreference" of this type can make sense when a repetition is
-involved and the group to the right has participated in an earlier iteration.
+always taken as a backreference, and causes an error only if there are not
+that many capturing left parentheses in the entire pattern. In other words, the
+parentheses that are referenced need not be to the left of the reference for
+numbers less than 8. A "forward backreference" of this type can make sense
+when a repetition is involved and the subpattern to the right has participated
+in an earlier iteration.
</P>
<P>
-It is not possible to have a numerical "forward backreference" to a group whose
-number is 8 or more using this syntax because a sequence such as \50 is
+It is not possible to have a numerical "forward backreference" to a subpattern
+whose number is 8 or more using this syntax because a sequence such as \50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
<a href="#digitsafterbackslash">above</a>
-for further details of the handling of digits following a backslash. Other
-forms of backreferencing do not suffer from this restriction. In particular,
-there is no problem when named capture groups are used (see below).
+for further details of the handling of digits following a backslash. There is
+no such problem when named parentheses are used. A backreference to any
+subpattern is possible using named parentheses (see below).
</P>
<P>
Another way of avoiding the ambiguity inherent in the use of digits following a
@@ -2258,22 +2235,22 @@ the reference. A signed number is a relative reference. Consider this example:
<pre>
(abc(def)ghi)\g{-1}
</pre>
-The sequence \g{-1} is a reference to the most recently started capture group
-before \g, that is, is it equivalent to \2 in this example. Similarly,
-\g{-2} would be equivalent to \1. The use of relative references can be
-helpful in long patterns, and also in patterns that are created by joining
-together fragments that contain references within themselves.
+The sequence \g{-1} is a reference to the most recently started capturing
+subpattern before \g, that is, is it equivalent to \2 in this example.
+Similarly, \g{-2} would be equivalent to \1. The use of relative references
+can be helpful in long patterns, and also in patterns that are created by
+joining together fragments that contain references within themselves.
</P>
<P>
-The sequence \g{+1} is a reference to the next capture group. This kind of
-forward reference can be useful in patterns that repeat. Perl does not support
-the use of + in this way.
+The sequence \g{+1} is a reference to the next capturing subpattern. This kind
+of forward reference can be useful it patterns that repeat. Perl does not
+support the use of + in this way.
</P>
<P>
-A backreference matches whatever actually most recently matched the capture
-group in the current subject string, rather than anything at all that matches
-the group (see
-<a href="#groupsassubroutines">"Groups as subroutines"</a>
+A backreference matches whatever actually matched the capturing subpattern in
+the current subject string, rather than anything matching the subpattern
+itself (see
+<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
below for a way of doing that). So the pattern
<pre>
(sens|respons)e and \1ibility
@@ -2285,28 +2262,28 @@ backreference, the case of letters is relevant. For example,
((?i)rah)\s+\1
</pre>
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
-capture group is matched caselessly.
+capturing subpattern is matched caselessly.
</P>
<P>
-There are several different ways of writing backreferences to named capture
-groups. The .NET syntax \k{name} and the Perl syntax \k&#60;name&#62; or \k'name'
-are supported, as is the Python syntax (?P=name). Perl 5.10's unified
+There are several different ways of writing backreferences to named
+subpatterns. The .NET syntax \k{name} and the Perl syntax \k&#60;name&#62; or
+\k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
backreference syntax, in which \g can be used for both numeric and named
-references, is also supported. We could rewrite the above example in any of the
-following ways:
+references, is also supported. We could rewrite the above example in any of
+the following ways:
<pre>
(?&#60;p1&#62;(?i)rah)\s+\k&#60;p1&#62;
(?'p1'(?i)rah)\s+\k{p1}
(?P&#60;p1&#62;(?i)rah)\s+(?P=p1)
(?&#60;p1&#62;(?i)rah)\s+\g{p1}
</pre>
-A capture group that is referenced by name may appear in the pattern before or
+A subpattern that is referenced by name may appear in the pattern before or
after the reference.
</P>
<P>
-There may be more than one backreference to the same group. If a group has not
-actually been used in a particular match, backreferences to it always fail by
-default. For example, the pattern
+There may be more than one backreference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any backreferences
+to it always fail by default. For example, the pattern
<pre>
(a|(bc))\2
</pre>
@@ -2315,11 +2292,12 @@ PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an
unset value matches an empty string.
</P>
<P>
-Because there may be many capture groups in a pattern, all digits following a
-backslash are taken as part of a potential backreference number. If the pattern
-continues with a digit character, some delimiter must be used to terminate the
-backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this
-can be white space. Otherwise, the \g{} syntax or an empty comment (see
+Because there may be many capturing parentheses in a pattern, all digits
+following a backslash are taken as part of a potential backreference number.
+If the pattern continues with a digit character, some delimiter must be used to
+terminate the backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
+option is set, this can be white space. Otherwise, the \g{ syntax or an empty
+comment (see
<a href="#comments">"Comments"</a>
below) can be used.
</P>
@@ -2327,18 +2305,19 @@ below) can be used.
Recursive backreferences
</b><br>
<P>
-A backreference that occurs inside the group to which it refers fails when the
-group is first used, so, for example, (a\1) never matches. However, such
-references can be useful inside repeated groups. For example, the pattern
+A backreference that occurs inside the parentheses to which it refers fails
+when the subpattern is first used, so, for example, (a\1) never matches.
+However, such references can be useful inside repeated subpatterns. For
+example, the pattern
<pre>
(a|b\1)+
</pre>
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
-the group, the backreference matches the character string corresponding to the
-previous iteration. In order for this to work, the pattern must be such that
-the first iteration does not need to match the backreference. This can be done
-using alternation, as in the example above, or by a quantifier with a minimum
-of zero.
+the subpattern, the backreference matches the character string corresponding
+to the previous iteration. In order for this to work, the pattern must be such
+that the first iteration does not need to match the backreference. This can be
+done using alternation, as in the example above, or by a quantifier with a
+minimum of zero.
</P>
<P>
Backreferences of this type cause the group that they reference to be treated
@@ -2355,32 +2334,26 @@ coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described
<a href="#smallassertions">above.</a>
</P>
<P>
-More complicated assertions are coded as parenthesized groups. There are two
-kinds: those that look ahead of the current position in the subject string, and
-those that look behind it, and in each case an assertion may be positive (must
-match for the assertion to be true) or negative (must not match for the
-assertion to be true). An assertion group is matched in the normal way,
-and if it is true, matching continues after it, but with the matching position
-in the subject string is was it was before the assertion was processed.
-</P>
-<P>
-A lookaround assertion may also appear as the condition in a
-<a href="#conditions">conditional group</a>
-(see below). In this case, the result of matching the assertion determines
-which branch of the condition is followed.
+More complicated assertions are coded as subpatterns. There are two kinds:
+those that look ahead of the current position in the subject string, and those
+that look behind it, and in each case an assertion may be positive (must
+succeed for matching to continue) or negative (must not succeed for matching to
+continue). An assertion subpattern is matched in the normal way, except that,
+when matching continues after a successful assertion, the matching position in
+the subject string is as it was before the assertion was processed.
</P>
<P>
-Assertion groups are not capture groups. If an assertion contains capture
-groups within it, these are counted for the purposes of numbering the capture
-groups in the whole pattern. Within each branch of an assertion, locally
-captured substrings may be referenced in the usual way. For example, a sequence
-such as (.)\g{-1} can be used to check that two adjacent characters are the
-same.
+Assertion subpatterns are not capturing subpatterns. If an assertion contains
+capturing subpatterns within it, these are counted for the purposes of
+numbering the capturing subpatterns in the whole pattern. Within each branch of
+an assertion, locally captured substrings may be referenced in the usual way.
+For example, a sequence such as (.)\g{-1} can be used to check that two
+adjacent characters are the same.
</P>
<P>
When a branch within an assertion fails to match, any substrings that were
captured are discarded (as happens with any pattern branch that fails to
-match). A negative assertion is true only when all its branches fail to match;
+match). A negative assertion succeeds only when all its branches fail to match;
this means that no captured substrings are ever retained after a successful
negative assertion. When an assertion contains a matching branch, what happens
depends on the type of assertion.
@@ -2389,25 +2362,25 @@ depends on the type of assertion.
For a positive assertion, internally captured substrings in the successful
branch are retained, and matching continues with the next pattern item after
the assertion. For a negative assertion, a matching branch means that the
-assertion is not true. If such an assertion is being used as a condition in a
-<a href="#conditions">conditional group</a>
+assertion has failed. If the assertion is being used as a condition in a
+<a href="#conditions">conditional subpattern</a>
(see below), captured substrings are retained, because matching continues with
the "no" branch of the condition. For other failing negative assertions,
control passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
</P>
<P>
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
+For compatibility with Perl, most assertion subpatterns may be repeated; though
+it makes no sense to assert the same thing several times, the side effect of
+capturing parentheses may occasionally be useful. However, an assertion that
+forms the condition for a conditional subpattern may not be quantified. In
+practice, for other assertions, there only three cases:
<br>
<br>
(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-<a href="#groupsassubroutines">subroutine mechanism.</a>
+However, it may contain internal capturing parenthesized groups that are called
+from elsewhere via the
+<a href="#subpatternsassubroutines">subroutine mechanism.</a>
<br>
<br>
(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
@@ -2419,25 +2392,6 @@ without the assertion, the order depending on the greediness of the quantifier.
The assertion is obeyed just once when encountered during matching.
</P>
<br><b>
-Alphabetic assertion names
-</b><br>
-<P>
-Traditionally, symbolic sequences such as (?= and (?&#60;= have been used to specify
-lookaround assertions. Perl 5.28 introduced some experimental alphabetic
-alternatives which might be easier to remember. They all start with (* instead
-of (? and must be written using lower case letters. PCRE2 supports the
-following synonyms:
-<pre>
- (*positive_lookahead: or (*pla: is the same as (?=
- (*negative_lookahead: or (*nla: is the same as (?!
- (*positive_lookbehind: or (*plb: is the same as (?&#60;=
- (*negative_lookbehind: or (*nlb: is the same as (?&#60;!
-</pre>
-For example, (*pla:foo) is the same assertion as (?=foo). In the following
-sections, the various assertions are described using the original symbolic
-forms.
-</P>
-<br><b>
Lookahead assertions
</b><br>
<P>
@@ -2519,9 +2473,9 @@ because it makes it impossible to calculate the length of the lookbehind. The
permitted in lookbehinds.
</P>
<P>
-<a href="#groupsassubroutines">"Subroutine"</a>
+<a href="#subpatternsassubroutines">"Subroutine"</a>
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
-as the called capture group matches a fixed-length string. However,
+as the subpattern matches a fixed-length string. However,
<a href="#recursion">recursion,</a>
that is, a "subroutine" call into a group that is already active,
is not supported.
@@ -2530,10 +2484,10 @@ is not supported.
Perl does not support backreferences in lookbehinds. PCRE2 does support them,
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
must not be set, there must be no use of (?| in the pattern (it creates
-duplicate group numbers), and if the backreference is by name, the name
-must be unique. Of course, the referenced group must itself match a fixed
-length substring. The following pattern matches words containing at least two
-characters that begin and end with the same character:
+duplicate subpattern numbers), and if the backreference is by name, the name
+must be unique. Of course, the referenced subpattern must itself be of fixed
+length. The following pattern matches words containing at least two characters
+that begin and end with the same character:
<pre>
\b(\w)\w++(?&#60;=\1)
</PRE>
@@ -2599,75 +2553,13 @@ preceded by "foo", while
</pre>
is another pattern that matches "foo" preceded by three digits and any three
characters that are not "999".
-</P>
-<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
-<P>
-In concept, a script run is a sequence of characters that are all from the same
-Unicode script such as Latin or Greek. However, because some scripts are
-commonly used together, and because some diacritical and other marks are used
-with multiple scripts, it is not that simple. There is a full description of
-the rules that PCRE2 uses in the section entitled
-<a href="pcre2unicode.html#scriptruns">"Script Runs"</a>
-in the
-<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
-documentation.
-</P>
-<P>
-If part of a pattern is enclosed between (*script_run: or (*sr: and a closing
-parenthesis, it fails if the sequence of characters that it matches are not a
-script run. After a failure, normal backtracking occurs. Script runs can be
-used to detect spoofing attacks using characters that look the same, but are
-from different scripts. The string "paypal.com" is an infamous example, where
-the letters could be a mixture of Latin and Cyrillic. This pattern ensures that
-the matched characters in a sequence of non-spaces that follow white space are
-a script run:
-<pre>
- \s+(*sr:\S+)
-</pre>
-To be sure that they are all from the Latin script (for example), a lookahead
-can be used:
-<pre>
- \s+(?=\p{Latin})(*sr:\S+)
-</pre>
-This works as long as the first character is expected to be a character in that
-script, and not (for example) punctuation, which is allowed with any script. If
-this is not the case, a more creative lookahead is needed. For example, if
-digits, underscore, and dots are permitted at the start:
-<pre>
- \s+(?=[0-9_.]*\p{Latin})(*sr:\S+)
-
-</PRE>
-</P>
-<P>
-In many cases, backtracking into a script run pattern fragment is not
-desirable. The script run can employ an atomic group to prevent this. Because
-this is a common requirement, a shorthand notation is provided by
-(*atomic_script_run: or (*asr:
-<pre>
- (*asr:...) is the same as (*sr:(?&#62;...))
-</pre>
-Note that the atomic group is inside the script run. Putting it outside would
-not prevent backtracking into the script run pattern.
-</P>
-<P>
-Support for script runs is not available if PCRE2 is compiled without Unicode
-support. A compile-time error is given if any of the above constructs is
-encountered. Script runs are not supported by the alternate matching function,
-<b>pcre2_dfa_match()</b> because they use the same mechanism as capturing
-parentheses.
-</P>
-<P>
-<b>Warning:</b> The (*ACCEPT) control verb
-<a href="#acceptverb">(see below)</a>
-should not be used within a script run group, because it causes an immediate
-exit from the group, bypassing the script run checking.
<a name="conditions"></a></P>
-<br><a name="SEC22" href="#TOC1">CONDITIONAL GROUPS</a><br>
+<br><a name="SEC21" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
<P>
-It is possible to cause the matching process to obey a pattern fragment
-conditionally or to choose between two alternative fragments, depending on
-the result of an assertion, or whether a specific capture group has
-already been matched. The two possible forms of conditional group are:
+It is possible to cause the matching process to obey a subpattern
+conditionally or to choose between two alternative subpatterns, depending on
+the result of an assertion, or whether a specific capturing subpattern has
+already been matched. The two possible forms of conditional subpattern are:
<pre>
(?(condition)yes-pattern)
(?(condition)yes-pattern|no-pattern)
@@ -2675,34 +2567,36 @@ already been matched. The two possible forms of conditional group are:
If the condition is satisfied, the yes-pattern is used; otherwise the
no-pattern (if present) is used. An absent no-pattern is equivalent to an empty
string (it always matches). If there are more than two alternatives in the
-group, a compile-time error occurs. Each of the two alternatives may itself
-contain nested groups of any form, including conditional groups; the
-restriction to two alternatives applies only at the level of the condition
-itself. This pattern fragment is an example where the alternatives are complex:
+subpattern, a compile-time error occurs. Each of the two alternatives may
+itself contain nested subpatterns of any form, including conditional
+subpatterns; the restriction to two alternatives applies only at the level of
+the condition. This pattern fragment is an example where the alternatives are
+complex:
<pre>
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
</PRE>
</P>
<P>
-There are five kinds of condition: references to capture groups, references to
+There are five kinds of condition: references to subpatterns, references to
recursion, two pseudo-conditions called DEFINE and VERSION, and assertions.
</P>
<br><b>
-Checking for a used capture group by number
+Checking for a used subpattern by number
</b><br>
<P>
If the text between the parentheses consists of a sequence of digits, the
-condition is true if a capture group of that number has previously matched. If
-there is more than one capture group with the same number (see the earlier
-<a href="#recursion">section about duplicate group numbers),</a>
+condition is true if a capturing subpattern of that number has previously
+matched. If there is more than one capturing subpattern with the same number
+(see the earlier
+<a href="#recursion">section about duplicate subpattern numbers),</a>
the condition is true if any of them have matched. An alternative notation is
-to precede the digits with a plus or minus sign. In this case, the group number
-is relative rather than absolute. The most recently opened capture group can be
-referenced by (?(-1), the next most recent by (?(-2), and so on. Inside loops
-it can also make sense to refer to subsequent groups. The next capture group
-can be referenced as (?(+1), and so on. (The value zero in any of these forms
-is not used; it provokes a compile-time error.)
+to precede the digits with a plus or minus sign. In this case, the subpattern
+number is relative rather than absolute. The most recently opened parentheses
+can be referenced by (?(-1), the next most recent by (?(-2), and so on. Inside
+loops it can also make sense to refer to subsequent groups. The next
+parentheses to be opened can be referenced as (?(+1), and so on. (The value
+zero in any of these forms is not used; it provokes a compile-time error.)
</P>
<P>
Consider the following pattern, which contains non-significant white space to
@@ -2714,12 +2608,12 @@ three parts for ease of discussion:
The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The second part
matches one or more characters that are not parentheses. The third part is a
-conditional group that tests whether or not the first capture group
-matched. If it did, that is, if subject started with an opening parenthesis,
+conditional subpattern that tests whether or not the first set of parentheses
+matched. If they did, that is, if subject started with an opening parenthesis,
the condition is true, and so the yes-pattern is executed and a closing
parenthesis is required. Otherwise, since no-pattern is not present, the
-conditional group matches nothing. In other words, this pattern matches a
-sequence of non-parentheses, optionally enclosed in parentheses.
+subpattern matches nothing. In other words, this pattern matches a sequence of
+non-parentheses, optionally enclosed in parentheses.
</P>
<P>
If you were embedding this pattern in a larger one, you could use a relative
@@ -2730,20 +2624,22 @@ reference:
This makes the fragment independent of the parentheses in the larger pattern.
</P>
<br><b>
-Checking for a used capture group by name
+Checking for a used subpattern by name
</b><br>
<P>
Perl uses the syntax (?(&#60;name&#62;)...) or (?('name')...) to test for a used
-capture group by name. For compatibility with earlier versions of PCRE1, which
-had this facility before Perl, the syntax (?(name)...) is also recognized.
-Note, however, that undelimited names consisting of the letter R followed by
-digits are ambiguous (see the following section). Rewriting the above example
-to use a named group gives this:
+subpattern by name. For compatibility with earlier versions of PCRE1, which had
+this facility before Perl, the syntax (?(name)...) is also recognized. Note,
+however, that undelimited names consisting of the letter R followed by digits
+are ambiguous (see the following section).
+</P>
+<P>
+Rewriting the above example to use a named subpattern gives this:
<pre>
(?&#60;OPEN&#62; \( )? [^()]+ (?(&#60;OPEN&#62;) \) )
</pre>
If the name used in a condition of this kind is a duplicate, the test is
-applied to all groups of the same name, and is true if any one of them has
+applied to all subpatterns of the same name, and is true if any one of them has
matched.
</P>
<br><b>
@@ -2755,20 +2651,20 @@ the pattern to another, whether or not it is actually recursive. See the
sections entitled
<a href="#recursion">"Recursive patterns"</a>
and
-<a href="#groupsassubroutines">"Groups as subroutines"</a>
-below for details of recursion and subroutine calls.
+<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
+below for details of recursion and subpattern calls.
</P>
<P>
-If a condition is the string (R), and there is no capture group with the name
-R, the condition is true if matching is currently in a recursion or subroutine
-call to the whole pattern or any capture group. If digits follow the letter R,
-and there is no group with that name, the condition is true if the most recent
-call is into a group with the given number, which must exist somewhere in the
-overall pattern. This is a contrived example that is equivalent to a+b:
+If a condition is the string (R), and there is no subpattern with the name R,
+the condition is true if matching is currently in a recursion or subroutine
+call to the whole pattern or any subpattern. If digits follow the letter R, and
+there is no subpattern with that name, the condition is true if the most recent
+call is into a subpattern with the given number, which must exist somewhere in
+the overall pattern. This is a contrived example that is equivalent to a+b:
<pre>
((?(R1)a+|(?1)b))
</pre>
-However, in both cases, if there is a capture group with a matching name, the
+However, in both cases, if there is a subpattern with a matching name, the
condition tests for its being set, as described in the section above, instead
of testing for recursion. For example, creating a group with the name R1 by
adding (?&#60;R1&#62;) to the above pattern completely changes its meaning.
@@ -2778,28 +2674,28 @@ If a name preceded by ampersand follows the letter R, for example:
<pre>
(?(R&name)...)
</pre>
-the condition is true if the most recent recursion is into a group of that name
-(which must exist within the pattern).
+the condition is true if the most recent recursion is into a subpattern of that
+name (which must exist within the pattern).
</P>
<P>
This condition does not check the entire recursion stack. It tests only the
current level. If the name used in a condition of this kind is a duplicate, the
-test is applied to all groups of the same name, and is true if any one of
+test is applied to all subpatterns of the same name, and is true if any one of
them is the most recent recursion.
</P>
<P>
At "top level", all these recursion test conditions are false.
<a name="subdefine"></a></P>
<br><b>
-Defining capture groups for use by reference only
+Defining subpatterns for use by reference only
</b><br>
<P>
If the condition is the string (DEFINE), the condition is always false, even if
there is a group with the name DEFINE. In this case, there may be only one
-alternative in the rest of the conditional group. It is always skipped if
-control reaches this point in the pattern; the idea of DEFINE is that it can be
-used to define subroutines that can be referenced from elsewhere. (The use of
-<a href="#groupsassubroutines">subroutines</a>
+alternative in the subpattern. It is always skipped if control reaches this
+point in the pattern; the idea of DEFINE is that it can be used to define
+subroutines that can be referenced from elsewhere. (The use of
+<a href="#subpatternsassubroutines">subroutines</a>
is described below.) For example, a pattern to match an IPv4 address such as
"192.168.23.245" could be written like this (ignore white space and line
breaks):
@@ -2836,10 +2732,10 @@ than two digits.
Assertion conditions
</b><br>
<P>
-If the condition is not in any of the above formats, it must be a parenthesized
-assertion. This may be a positive or negative lookahead or lookbehind
-assertion. Consider this pattern, again containing non-significant white space,
-and with the two alternatives on the second line:
+If the condition is not in any of the above formats, it must be an assertion.
+This may be a positive or negative lookahead or lookbehind assertion. Consider
+this pattern, again containing non-significant white space, and with the two
+alternatives on the second line:
<pre>
(?(?=[^a-z]*[a-z])
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
@@ -2852,18 +2748,18 @@ against the second. This pattern matches strings in one of the two forms
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
</P>
<P>
-When an assertion that is a condition contains capture groups, any
+When an assertion that is a condition contains capturing subpatterns, any
capturing that occurs in a matching branch is retained afterwards, for both
positive and negative assertions, because matching always continues after the
assertion, whether it succeeds or fails. (Compare non-conditional assertions,
-for which captures are retained only for positive assertions that succeed.)
+when captures are retained only for positive assertions that succeed.)
<a name="comments"></a></P>
-<br><a name="SEC23" href="#TOC1">COMMENTS</a><br>
+<br><a name="SEC22" href="#TOC1">COMMENTS</a><br>
<P>
There are two ways of including comments in patterns that are processed by
PCRE2. In both cases, the start of the comment must not be in a character
class, nor in the middle of any other sequence of related characters such as
-(?: or a group name or number. The characters that make up a comment play
+(?: or a subpattern name or number. The characters that make up a comment play
no part in the pattern matching.
</P>
<P>
@@ -2888,7 +2784,7 @@ a newline in the pattern. The sequence \n is still literal at this stage, so
it does not terminate the comment. Only an actual character with the code value
0x0a (the default newline) does so.
<a name="recursion"></a></P>
-<br><a name="SEC24" href="#TOC1">RECURSIVE PATTERNS</a><br>
+<br><a name="SEC23" href="#TOC1">RECURSIVE PATTERNS</a><br>
<P>
Consider the problem of matching a string in parentheses, allowing for
unlimited nested parentheses. Without the use of recursion, the best that can
@@ -2910,14 +2806,14 @@ recursively to the pattern in which it appears.
<P>
Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it
supports special syntax for recursion of the entire pattern, and also for
-individual capture group recursion. After its introduction in PCRE1 and Python,
+individual subpattern recursion. After its introduction in PCRE1 and Python,
this kind of recursion was subsequently introduced into Perl at release 5.10.
</P>
<P>
A special item that consists of (? followed by a number greater than zero and a
-closing parenthesis is a recursive subroutine call of the capture group of the
-given number, provided that it occurs inside that group. (If not, it is a
-<a href="#groupsassubroutines">non-recursive subroutine</a>
+closing parenthesis is a recursive subroutine call of the subpattern of the
+given number, provided that it occurs inside that subpattern. (If not, it is a
+<a href="#subpatternsassubroutines">non-recursive subroutine</a>
call, which is described in the next section.) The special item (?R) or (?0) is
a recursive call of the entire regular expression.
</P>
@@ -2951,13 +2847,13 @@ capturing parentheses leftwards from the point at which it is encountered.
</P>
<P>
Be aware however, that if
-<a href="#dupgroupnumber">duplicate capture group numbers</a>
-are in use, relative references refer to the earliest group with the
+<a href="#dupsubpatternnumber">duplicate subpattern numbers</a>
+are in use, relative references refer to the earliest subpattern with the
appropriate number. Consider, for example:
<pre>
(?|(a)|(b)) (c) (?-2)
</pre>
-The first two capture groups (a) and (b) are both numbered 1, and group (c)
+The first two capturing groups (a) and (b) are both numbered 1, and group (c)
is number 2. When the reference (?-2) is encountered, the second most recently
opened parentheses has the number 1, but it is the first such group (the (a)
group) to which the recursion refers. This would be the same if an absolute
@@ -2965,10 +2861,10 @@ reference (?1) was used. In other words, relative references are just a
shorthand for computing a group number.
</P>
<P>
-It is also possible to refer to subsequent capture groups, by writing
+It is also possible to refer to subsequently opened parentheses, by writing
references such as (?+2). However, these cannot be recursive because the
reference is not inside the parentheses that are referenced. They are always
-<a href="#groupsassubroutines">non-recursive subroutine</a>
+<a href="#subpatternsassubroutines">non-recursive subroutine</a>
calls, as described in the next section.
</P>
<P>
@@ -2978,7 +2874,7 @@ rewrite the above example as follows:
<pre>
(?&#60;pn&#62; \( ( [^()]++ | (?&pn) )* \) )
</pre>
-If there is more than one group with the same name, the earliest one is
+If there is more than one subpattern with the same name, the earliest one is
used.
</P>
<P>
@@ -3004,9 +2900,9 @@ documentation). If the pattern above is matched against
(ab(cd)ef)
</pre>
the value for the inner capturing parentheses (numbered 2) is "ef", which is
-the last value taken on at the top level. If a capture group is not matched at
-the top level, its final captured value is unset, even if it was (temporarily)
-set at a deeper level during the matching process.
+the last value taken on at the top level. If a capturing subpattern is not
+matched at the top level, its final captured value is unset, even if it was
+(temporarily) set at a deeper level during the matching process.
</P>
<P>
Do not confuse the (?R) item with the condition (R), which tests for recursion.
@@ -3016,9 +2912,9 @@ recursing), whereas any characters are permitted at the outer level.
<pre>
&#60; (?: (?(R) \d++ | [^&#60;&#62;]*+) | (?R)) * &#62;
</pre>
-In this pattern, (?(R) is the start of a conditional group, with two different
-alternatives for the recursive and non-recursive cases. The (?R) item is the
-actual recursive call.
+In this pattern, (?(R) is the start of a conditional subpattern, with two
+different alternatives for the recursive and non-recursive cases. The (?R) item
+is the actual recursive call.
<a name="recursiondifference"></a></P>
<br><b>
Differences in recursion processing between PCRE2 and Perl
@@ -3028,7 +2924,7 @@ Some former differences between PCRE2 and Perl no longer exist.
</P>
<P>
Before release 10.30, recursion processing in PCRE2 differed from Perl in that
-a recursive subroutine call was always treated as an atomic group. That is,
+a recursive subpattern call was always treated as an atomic group. That is,
once it had matched some of the subject string, it was never re-entered, even
if it contained untried alternatives and there was a subsequent matching
failure. (Historical note: PCRE implemented recursion before Perl did.)
@@ -3064,7 +2960,7 @@ Perl takes so long that you think it has gone into a loop.
<P>
Another way in which PCRE2 and Perl used to differ in their recursion
processing is in the handling of captured values. Formerly in Perl, when a
-group was called recursively or as a subroutine (see the next section), it
+subpattern was called recursively or as a subpattern (see the next section), it
had no access to any values that were captured outside the recursion, whereas
in PCRE2 these values can be referenced. Consider this pattern:
<pre>
@@ -3075,15 +2971,16 @@ the second group, when the backreference \1 fails to match "b", the second
alternative matches "a" and then recurses. In the recursion, \1 does now match
"b" and so the whole match succeeds. This match used to fail in Perl, but in
later versions (I tried 5.024) it now works.
-<a name="groupsassubroutines"></a></P>
-<br><a name="SEC25" href="#TOC1">GROUPS AS SUBROUTINES</a><br>
+<a name="subpatternsassubroutines"></a></P>
+<br><a name="SEC24" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
<P>
-If the syntax for a recursive group call (either by number or by name) is used
-outside the parentheses to which it refers, it operates a bit like a subroutine
-in a programming language. More accurately, PCRE2 treats the referenced group
-as an independent subpattern which it tries to match at the current matching
-position. The called group may be defined before or after the reference. A
-numbered reference can be absolute or relative, as in these examples:
+If the syntax for a recursive subpattern call (either by number or by
+name) is used outside the parentheses to which it refers, it operates a bit
+like a subroutine in a programming language. More accurately, PCRE2 treats the
+referenced subpattern as an independent subpattern which it tries to match at
+the current matching position. The called subpattern may be defined before or
+after the reference. A numbered reference can be absolute or relative, as in
+these examples:
<pre>
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -3108,28 +3005,28 @@ occur. However, any capturing parentheses that are set during the subroutine
call revert to their previous values afterwards.
</P>
<P>
-Processing options such as case-independence are fixed when a group is
+Processing options such as case-independence are fixed when a subpattern is
defined, so if it is used as a subroutine, such options cannot be changed for
different calls. For example, consider this pattern:
<pre>
(abc)(?i:(?-1))
</pre>
It matches "abcabc". It does not match "abcABC" because the change of
-processing option does not affect the called group.
+processing option does not affect the called subpattern.
</P>
<P>
The behaviour of
<a href="#backtrackcontrol">backtracking control verbs</a>
-in groups when called as subroutines is described in the section entitled
+in subpatterns when called as subroutines is described in the section entitled
<a href="#btsub">"Backtracking verbs in subroutines"</a>
below.
<a name="onigurumasubroutines"></a></P>
-<br><a name="SEC26" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
+<br><a name="SEC25" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
<P>
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
-syntax for calling a group as a subroutine, possibly recursively. Here are two
-of the examples used above, rewritten using this syntax:
+syntax for referencing a subpattern as a subroutine, possibly recursively. Here
+are two of the examples used above, rewritten using this syntax:
<pre>
(?&#60;pn&#62; \( ( (?&#62;[^()]+) | \g&#60;pn&#62; )* \) )
(sens|respons)e and \g'1'ibility
@@ -3142,7 +3039,7 @@ plus or a minus sign it is taken as a relative reference. For example:
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
synonymous. The former is a backreference; the latter is a subroutine call.
</P>
-<br><a name="SEC27" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
<P>
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
code to be obeyed in the middle of matching a regular expression. This makes it
@@ -3218,13 +3115,12 @@ example:
</pre>
The doubling is removed before the string is passed to the callout function.
<a name="backtrackcontrol"></a></P>
-<br><a name="SEC28" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
There are a number of special "Backtracking Control Verbs" (to use Perl's
terminology) that modify the behaviour of backtracking during matching. They
are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form,
possibly behaving differently depending on whether or not a name is present.
-The names are not required to be unique within the pattern.
</P>
<P>
By default, for compatibility with Perl, a name is any sequence of characters
@@ -3266,7 +3162,7 @@ The behaviour of these verbs in
<a href="#btrepeat">repeated groups,</a>
<a href="#btassert">assertions,</a>
and in
-<a href="#btsub">capture groups called as subroutines</a>
+<a href="#btsub">subpatterns called as subroutines</a>
(whether or not recursively) is documented below.
<a name="nooptimize"></a></P>
<br><b>
@@ -3290,7 +3186,7 @@ documentation.
<P>
Experiments with Perl suggest that it too has similar optimizations, and like
PCRE2, turning them off can change the result of a match.
-<a name="acceptverb"></a></P>
+</P>
<br><b>
Verbs that act immediately
</b><br>
@@ -3300,8 +3196,8 @@ The following verbs act as soon as they are encountered.
(*ACCEPT) or (*ACCEPT:NAME)
</pre>
This verb causes the match to end successfully, skipping the remainder of the
-pattern. However, when it is inside a capture group that is called as a
-subroutine, only that group is ended successfully. Matching then continues
+pattern. However, when it is inside a subpattern that is called as a
+subroutine, only that subpattern is ended successfully. Matching then continues
at the outer level. If (*ACCEPT) in triggered in a positive assertion, the
assertion succeeds; in a negative assertion, the assertion fails.
</P>
@@ -3313,10 +3209,6 @@ example:
</pre>
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
-</P>
-<P>
-<b>Warning:</b> (*ACCEPT) should not be used within a script run group, because
-it causes an immediate exit from the group, bypassing the script run checking.
<pre>
(*FAIL) or (*FAIL:NAME)
</pre>
@@ -3332,8 +3224,8 @@ A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
</P>
<P>
-(*ACCEPT:NAME) and (*FAIL:NAME) are treated as (*MARK:NAME)(*ACCEPT) and
-(*MARK:NAME)(*FAIL), respectively.
+(*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+(*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
</P>
<br><b>
Recording which path was taken
@@ -3345,25 +3237,27 @@ starting point (see (*SKIP) below).
<pre>
(*MARK:NAME) or (*:NAME)
</pre>
-A name is always required with this verb. For all the other backtracking
-control verbs, a NAME argument is optional.
+A name is always required with this verb. There may be as many instances of
+(*MARK) as you like in a pattern, and their names do not have to be unique.
</P>
<P>
-When a match succeeds, the name of the last-encountered mark name on the
+When a match succeeds, the name of the last-encountered (*MARK:NAME) on the
matching path is passed back to the caller as described in the section entitled
<a href="pcre2api.html#matchotherdata">"Other information about the match"</a>
in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-documentation. This applies to all instances of (*MARK) and other verbs,
-including those inside assertions and atomic groups. However, there are
-differences in those cases when (*MARK) is used in conjunction with (*SKIP) as
-described below.
+documentation. This applies to all instances of (*MARK), including those inside
+assertions and atomic groups. (There are differences in those cases when
+(*MARK) is used in conjunction with (*SKIP) as described below.)
+</P>
+<P>
+As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+associated NAME arguments. Whichever is last on the matching path is passed
+back. See below for more details of these other verbs.
</P>
<P>
-The mark name that was last encountered on the matching path is passed back. A
-verb without a NAME argument is ignored for this purpose. Here is an example of
-<b>pcre2test</b> output, where the "mark" modifier requests the retrieval and
-outputting of (*MARK) data:
+Here is an example of <b>pcre2test</b> output, where the "mark" modifier
+requests the retrieval and outputting of (*MARK) data:
<pre>
re&#62; /X(*MARK:A)Y|X(*MARK:B)Z/mark
data&#62; XY
@@ -3413,7 +3307,7 @@ to the left of the verb. However, when one of these verbs appears inside an
atomic group or in a lookaround assertion that is true, its effect is confined
to that group, because once the group has been matched, there is never any
backtracking into it. Backtracking from beyond an assertion or an atomic group
-ignores the entire group, and seeks a preceding backtracking point.
+ignores the entire group, and seeks a preceeding backtracking point.
</P>
<P>
These verbs differ in exactly what kind of failure occurs when backtracking
@@ -3438,8 +3332,8 @@ dynamic anchor, or "I've started, so I must finish."
<P>
The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
-caller. However, (*SKIP:NAME) searches only for names that are set with
-(*MARK), ignoring those set by any of the other backtracking verbs.
+caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
</P>
<P>
If there is more than one backtracking verb in a pattern, a different one that
@@ -3483,7 +3377,7 @@ as (*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by other backtracking verbs.
+ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
<pre>
(*SKIP)
</pre>
@@ -3538,7 +3432,7 @@ the second branch of the pattern.
</P>
<P>
Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores
-names that are set by other backtracking verbs.
+names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or (*THEN:NAME).
<pre>
(*THEN) or (*THEN:NAME)
</pre>
@@ -3560,32 +3454,34 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by other backtracking verbs.
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
</P>
<P>
-A group that does not contain a | character is just a part of the enclosing
-alternative; it is not a nested alternation with only one alternative. The
-effect of (*THEN) extends beyond such a group to the enclosing alternative.
-Consider this pattern, where A, B, etc. are complex pattern fragments that do
-not contain any | characters at this level:
+A subpattern that does not contain a | character is just a part of the
+enclosing alternative; it is not a nested alternation with only one
+alternative. The effect of (*THEN) extends beyond such a subpattern to the
+enclosing alternative. Consider this pattern, where A, B, etc. are complex
+pattern fragments that do not contain any | characters at this level:
<pre>
A (B(*THEN)C) | D
</pre>
If A and B are matched, but there is a failure in C, matching does not
backtrack into A; instead it moves to the next alternative, that is, D.
-However, if the group containing (*THEN) is given an alternative, it
+However, if the subpattern containing (*THEN) is given an alternative, it
behaves differently:
<pre>
A (B(*THEN)C | (*FAIL)) | D
</pre>
-The effect of (*THEN) is now confined to the inner group. After a failure in C,
-matching moves to (*FAIL), which causes the whole group to fail because there
-are no more alternatives to try. In this case, matching does backtrack into A.
+The effect of (*THEN) is now confined to the inner subpattern. After a failure
+in C, matching moves to (*FAIL), which causes the whole subpattern to fail
+because there are no more alternatives to try. In this case, matching does now
+backtrack into A.
</P>
<P>
-Note that a conditional group is not considered as having two alternatives,
-because only one is ever used. In other words, the | character in a conditional
-group has a different meaning. Ignoring white space, consider:
+Note that a conditional subpattern is not considered as having two
+alternatives, because only one is ever used. In other words, the | character in
+a conditional subpattern has a different meaning. Ignoring white space,
+consider:
<pre>
^.*? (?(?=a) a | b(*THEN)c )
</pre>
@@ -3593,7 +3489,7 @@ If the subject is "ba", this pattern does not match. Because .*? is ungreedy,
it initially matches zero characters. The condition (?=a) then fails, the
character "b" is matched, but "c" is not. At this point, matching does not
backtrack to .*? as might perhaps be expected from the presence of the |
-character. The conditional group is part of the single alternative that
+character. The conditional subpattern is part of the single alternative that
comprises the whole pattern, and so the match fails. (If there was a backtrack
into .*?, allowing it to match "b", the match would succeed.)
</P>
@@ -3649,14 +3545,14 @@ Backtracking verbs in assertions
(*FAIL) in any assertion has its normal effect: it forces an immediate
backtrack. The behaviour of the other backtracking verbs depends on whether or
not the assertion is standalone or acting as the condition in a conditional
-group.
+subpattern.
</P>
<P>
(*ACCEPT) in a standalone positive assertion causes the assertion to succeed
-without any further processing; captured strings and a mark name (if set) are
-retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to
-fail without any further processing; captured substrings and any mark name are
-discarded.
+without any further processing; captured strings and a (*MARK) name (if set)
+are retained. In a standalone negative assertion, (*ACCEPT) causes the
+assertion to fail without any further processing; captured substrings and any
+(*MARK) name are discarded.
</P>
<P>
If the assertion is a condition, (*ACCEPT) causes the condition to be true for
@@ -3688,35 +3584,36 @@ the assertion to be true, without considering any further alternative branches.
Backtracking verbs in subroutines
</b><br>
<P>
-These behaviours occur whether or not the group is called recursively.
+These behaviours occur whether or not the subpattern is called recursively.
</P>
<P>
-(*ACCEPT) in a group called as a subroutine causes the subroutine match to
+(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to
succeed without any further processing. Matching then continues after the
subroutine call. Perl documents this behaviour. Perl's treatment of the other
verbs in subroutines is different in some cases.
</P>
<P>
-(*FAIL) in a group called as a subroutine has its normal effect: it forces
+(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
an immediate backtrack.
</P>
<P>
(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when
-triggered by being backtracked to in a group called as a subroutine. There is
-then a backtrack at the outer level.
+triggered by being backtracked to in a subpattern called as a subroutine. There
+is then a backtrack at the outer level.
</P>
<P>
(*THEN), when triggered, skips to the next alternative in the innermost
-enclosing group that has alternatives (its normal behaviour). However, if there
-is no such group within the subroutine's group, the subroutine match fails and
-there is a backtrack at the outer level.
+enclosing group within the subpattern that has alternatives (its normal
+behaviour). However, if there is no such group within the subroutine
+subpattern, the subroutine match fails and there is a backtrack at the outer
+level.
</P>
-<br><a name="SEC29" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC28" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2api</b>(3), <b>pcre2callout</b>(3), <b>pcre2matching</b>(3),
<b>pcre2syntax</b>(3), <b>pcre2</b>(3).
</P>
-<br><a name="SEC30" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC29" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -3725,11 +3622,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC31" href="#TOC1">REVISION</a><br>
+<br><a name="SEC30" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 12 February 2019
+Last updated: 04 September 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2perform.html b/dist2/doc/html/pcre2perform.html
index 80d716c7..f823c12f 100644
--- a/dist2/doc/html/pcre2perform.html
+++ b/dist2/doc/html/pcre2perform.html
@@ -31,9 +31,9 @@ of them.
Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
so that most simple patterns do not use much memory for storing the compiled
version. However, there is one case where the memory usage of a compiled
-pattern can be unexpectedly large. If a parenthesized group has a quantifier
-with a minimum greater than 1 and/or a limited maximum, the whole group is
-repeated in the compiled code. For example, the pattern
+pattern can be unexpectedly large. If a parenthesized subpattern has a
+quantifier with a minimum greater than 1 and/or a limited maximum, the whole
+subpattern is repeated in the compiled code. For example, the pattern
<pre>
(abc|def){2,4}
</pre>
@@ -252,9 +252,9 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 03 February 2019
+Last updated: 25 April 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2posix.html b/dist2/doc/html/pcre2posix.html
index 20a2009b..1da24601 100644
--- a/dist2/doc/html/pcre2posix.html
+++ b/dist2/doc/html/pcre2posix.html
@@ -15,75 +15,51 @@ please consult the man page, in case the conversion went wrong.
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
-<li><a name="TOC3" href="#SEC3">USING THE POSIX FUNCTIONS</a>
-<li><a name="TOC4" href="#SEC4">COMPILING A PATTERN</a>
-<li><a name="TOC5" href="#SEC5">MATCHING NEWLINE CHARACTERS</a>
-<li><a name="TOC6" href="#SEC6">MATCHING A PATTERN</a>
-<li><a name="TOC7" href="#SEC7">ERROR MESSAGES</a>
-<li><a name="TOC8" href="#SEC8">MEMORY USAGE</a>
-<li><a name="TOC9" href="#SEC9">AUTHOR</a>
-<li><a name="TOC10" href="#SEC10">REVISION</a>
+<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
+<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
+<li><a name="TOC5" href="#SEC5">MATCHING A PATTERN</a>
+<li><a name="TOC6" href="#SEC6">ERROR MESSAGES</a>
+<li><a name="TOC7" href="#SEC7">MEMORY USAGE</a>
+<li><a name="TOC8" href="#SEC8">AUTHOR</a>
+<li><a name="TOC9" href="#SEC9">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>#include &#60;pcre2posix.h&#62;</b>
</P>
<P>
-<b>int pcre2_regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
+<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
<b> int <i>cflags</i>);</b>
<br>
<br>
-<b>int pcre2_regexec(const regex_t *<i>preg</i>, const char *<i>string</i>,</b>
+<b>int regexec(const regex_t *<i>preg</i>, const char *<i>string</i>,</b>
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
<br>
<br>
-<b>size_t pcre2_regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
+<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
<br>
<br>
-<b>void pcre2_regfree(regex_t *<i>preg</i>);</b>
+<b>void regfree(regex_t *<i>preg</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
This set of functions provides a POSIX-style API for the PCRE2 regular
-expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
-and 32-bit libraries. See the
+expression 8-bit library. See the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation for a description of PCRE2's native API, which contains much
-additional functionality.
+additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit
+and 32-bit libraries.
</P>
<P>
-The functions described here are wrapper functions that ultimately call the
-PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
-file, and they all have unique names starting with <b>pcre2_</b>. However, the
-<b>pcre2posix.h</b> header also contains macro definitions that convert the
-standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
-means that a program can use the usual POSIX names without running the risk of
-accidentally linking with POSIX functions from a different library.
+The functions described here are just wrapper functions that ultimately call
+the PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b>
+header file, and on Unix systems the library itself is called
+<b>libpcre2-posix.a</b>, so can be accessed by adding <b>-lpcre2-posix</b> to the
+command for linking an application that uses them. Because the POSIX functions
+call the native ones, it is also necessary to add <b>-lpcre2-8</b>.
</P>
<P>
-On Unix-like systems the PCRE2 POSIX library is called <b>libpcre2-posix</b>, so
-can be accessed by adding <b>-lpcre2-posix</b> to the command for linking an
-application. Because the POSIX functions call the native ones, it is also
-necessary to add <b>-lpcre2-8</b>.
-</P>
-<P>
-Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
-does contain functions with the POSIX names <b>regcomp()</b> etc. These simply
-pass their arguments to the PCRE2 functions. These functions are provided for
-backwards compatibility with earlier versions of PCRE2, so that existing
-programs do not have to be recompiled.
-</P>
-<P>
-Calling the header file <b>pcre2posix.h</b> avoids any conflict with other POSIX
-libraries. It can, of course, be renamed or aliased as <b>regex.h</b>, which is
-the "correct" name, if there is no clash. It provides two structure types,
-<i>regex_t</i> for compiled internal forms, and <i>regmatch_t</i> for returning
-captured substrings. It also defines some constants whose names start with
-"REG_"; these are used for setting options and identifying error codes.
-</P>
-<br><a name="SEC3" href="#TOC1">USING THE POSIX FUNCTIONS</a><br>
-<P>
Those POSIX option bits that can reasonably be mapped to PCRE2 native options
have been implemented. In addition, the option REG_EXTENDED is defined with the
value zero. This has no effect, but since programs that are written to the
@@ -104,13 +80,17 @@ POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding
domains it is probably even less compatible.
</P>
<P>
-The descriptions below use the actual names of the functions, but, as described
-above, the standard POSIX names (without the <b>pcre2_</b> prefix) may also be
-used.
+The header for these functions is supplied as <b>pcre2posix.h</b> to avoid any
+potential clash with other POSIX libraries. It can, of course, be renamed or
+aliased as <b>regex.h</b>, which is the "correct" name. It provides two
+structure types, <i>regex_t</i> for compiled internal forms, and
+<i>regmatch_t</i> for returning captured substrings. It also defines some
+constants whose names start with "REG_"; these are used for setting options and
+identifying error codes.
</P>
-<br><a name="SEC4" href="#TOC1">COMPILING A PATTERN</a><br>
+<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
<P>
-The function <b>pcre2_regcomp()</b> is called to compile a pattern into an
+The function <b>regcomp()</b> is called to compile a pattern into an
internal form. By default, the pattern is a C string terminated by a binary
zero (but see REG_PEND below). The <i>preg</i> argument is a pointer to a
<b>regex_t</b> structure that is used as a base for storing information about
@@ -148,18 +128,18 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
<pre>
REG_NOSUB
</pre>
-When a pattern that is compiled with this flag is passed to
-<b>pcre2_regexec()</b> for matching, the <i>nmatch</i> and <i>pmatch</i> arguments
-are ignored, and no captured strings are returned. Versions of the PCRE library
-prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
-no longer happens because it disables the use of backreferences.
+When a pattern that is compiled with this flag is passed to <b>regexec()</b> for
+matching, the <i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no
+captured strings are returned. Versions of the PCRE library prior to 10.22 used
+to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens
+because it disables the use of backreferences.
<pre>
REG_PEND
</pre>
If this option is set, the <b>reg_endp</b> field in the <i>preg</i> structure
(which has the type const char *) must be set to point to the character beyond
-the end of the pattern before calling <b>pcre2_regcomp()</b>. The pattern itself
-may now contain binary zeros, which are treated as data characters. Without
+the end of the pattern before calling <b>regcomp()</b>. The pattern itself may
+now contain binary zeros, which are treated as data characters. Without
REG_PEND, a binary zero terminates the pattern and the <b>re_endp</b> field is
ignored. This is a GNU extension to the POSIX standard and should be used with
caution in software intended to be portable to other systems.
@@ -194,19 +174,18 @@ newlines are matched by the dot metacharacter (they are not) or by a negative
class such as [^a] (they are).
</P>
<P>
-The yield of <b>pcre2_regcomp()</b> is zero on success, and non-zero otherwise.
-The <i>preg</i> structure is filled in on success, and one other member of the
+The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
+<i>preg</i> structure is filled in on success, and one other member of the
structure (as well as <i>re_endp</i>) is public: <i>re_nsub</i> contains the
number of capturing subpatterns in the regular expression. Various error codes
are defined in the header file.
</P>
<P>
-NOTE: If the yield of <b>pcre2_regcomp()</b> is non-zero, you must not attempt
-to use the contents of the <i>preg</i> structure. If, for example, you pass it
-to <b>pcre2_regexec()</b>, the result is undefined and your program is likely to
-crash.
+NOTE: If the yield of <b>regcomp()</b> is non-zero, you must not attempt to
+use the contents of the <i>preg</i> structure. If, for example, you pass it to
+<b>regexec()</b>, the result is undefined and your program is likely to crash.
</P>
-<br><a name="SEC5" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
+<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
<P>
This area is not simple, because POSIX and Perl take different views of things.
It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was
@@ -240,16 +219,16 @@ is no way to stop newline from matching [^a].
Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and
PCRE2_DOLLAR_ENDONLY when calling <b>pcre2_compile()</b> directly, but there is
no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using
-the POSIX API, passing REG_NEWLINE to PCRE2's <b>pcre2_regcomp()</b> function
+the POSIX API, passing REG_NEWLINE to PCRE2's <b>regcomp()</b> function
causes PCRE2_MULTILINE to be passed to <b>pcre2_compile()</b>, and REG_DOTALL
passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.
</P>
-<br><a name="SEC6" href="#TOC1">MATCHING A PATTERN</a><br>
+<br><a name="SEC5" href="#TOC1">MATCHING A PATTERN</a><br>
<P>
-The function <b>pcre2_regexec()</b> is called to match a compiled pattern
-<i>preg</i> against a given <i>string</i>, which is by default terminated by a
-zero byte (but see REG_STARTEND below), subject to the options in <i>eflags</i>.
-These can be:
+The function <b>regexec()</b> is called to match a compiled pattern <i>preg</i>
+against a given <i>string</i>, which is by default terminated by a zero byte
+(but see REG_STARTEND below), subject to the options in <i>eflags</i>. These can
+be:
<pre>
REG_NOTBOL
</pre>
@@ -293,7 +272,7 @@ are mutually exclusive; the error REG_INVARG is returned.
<P>
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
-<b>pcre2_regexec()</b> are ignored (except possibly as input for REG_STARTEND).
+<b>regexec()</b> are ignored (except possibly as input for REG_STARTEND).
</P>
<P>
The value of <i>nmatch</i> may be zero, and the value <i>pmatch</i> may be NULL
@@ -315,25 +294,24 @@ array have both structure members set to -1.
A successful match yields a zero return; various error codes are defined in the
header file, of which REG_NOMATCH is the "expected" failure code.
</P>
-<br><a name="SEC7" href="#TOC1">ERROR MESSAGES</a><br>
+<br><a name="SEC6" href="#TOC1">ERROR MESSAGES</a><br>
<P>
-The <b>pcre2_regerror()</b> function maps a non-zero errorcode from either
-<b>pcre2_regcomp()</b> or <b>pcre2_regexec()</b> to a printable message. If
-<i>preg</i> is not NULL, the error should have arisen from the use of that
-structure. A message terminated by a binary zero is placed in <i>errbuf</i>. If
-the buffer is too short, only the first <i>errbuf_size</i> - 1 characters of the
-error message are used. The yield of the function is the size of buffer needed
-to hold the whole message, including the terminating zero. This value is
-greater than <i>errbuf_size</i> if the message was truncated.
+The <b>regerror()</b> function maps a non-zero errorcode from either
+<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
+NULL, the error should have arisen from the use of that structure. A message
+terminated by a binary zero is placed in <i>errbuf</i>. If the buffer is too
+short, only the first <i>errbuf_size</i> - 1 characters of the error message are
+used. The yield of the function is the size of buffer needed to hold the whole
+message, including the terminating zero. This value is greater than
+<i>errbuf_size</i> if the message was truncated.
</P>
-<br><a name="SEC8" href="#TOC1">MEMORY USAGE</a><br>
+<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
<P>
Compiling a regular expression causes memory to be allocated and associated
-with the <i>preg</i> structure. The function <b>pcre2_regfree()</b> frees all
-such memory, after which <i>preg</i> may no longer be used as a compiled
-expression.
+with the <i>preg</i> structure. The function <b>regfree()</b> frees all such
+memory, after which <i>preg</i> may no longer be used as a compiled expression.
</P>
-<br><a name="SEC9" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC8" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -342,11 +320,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC10" href="#TOC1">REVISION</a><br>
+<br><a name="SEC9" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 30 January 2019
+Last updated: 15 June 2017
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2017 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2syntax.html b/dist2/doc/html/pcre2syntax.html
index e3dc1861..7d332a10 100644
--- a/dist2/doc/html/pcre2syntax.html
+++ b/dist2/doc/html/pcre2syntax.html
@@ -32,15 +32,14 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
-<li><a name="TOC20" href="#SEC20">SCRIPT RUNS</a>
-<li><a name="TOC21" href="#SEC21">BACKREFERENCES</a>
-<li><a name="TOC22" href="#SEC22">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
-<li><a name="TOC23" href="#SEC23">CONDITIONAL PATTERNS</a>
-<li><a name="TOC24" href="#SEC24">BACKTRACKING CONTROL</a>
-<li><a name="TOC25" href="#SEC25">CALLOUTS</a>
-<li><a name="TOC26" href="#SEC26">SEE ALSO</a>
-<li><a name="TOC27" href="#SEC27">AUTHOR</a>
-<li><a name="TOC28" href="#SEC28">REVISION</a>
+<li><a name="TOC20" href="#SEC20">BACKREFERENCES</a>
+<li><a name="TOC21" href="#SEC21">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
+<li><a name="TOC22" href="#SEC22">CONDITIONAL PATTERNS</a>
+<li><a name="TOC23" href="#SEC23">BACKTRACKING CONTROL</a>
+<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
+<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
+<li><a name="TOC26" href="#SEC26">AUTHOR</a>
+<li><a name="TOC27" href="#SEC27">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
<P>
@@ -58,8 +57,7 @@ documentation. This document contains a quick-reference summary of the syntax.
</P>
<br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
<P>
-This table applies to ASCII and Unicode environments. An unrecognized escape
-sequence causes an error.
+This table applies to ASCII and Unicode environments.
<pre>
\a alarm, that is, the BEL character (hex 07)
\cx "control-x", where x is any ASCII printing character
@@ -71,25 +69,12 @@ sequence causes an error.
\0dd character with octal code 0dd
\ddd character with octal code ddd, or backreference
\o{ddd..} character with octal code ddd..
+ \U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
\N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
+ \uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\xhh character with hex code hh
\x{hh..} character with hex code hh..
</pre>
-If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
-following are also recognized:
-<pre>
- \U the character "U"
- \uhhhh character with hex code hhhh
- \u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX
-</pre>
-When \x is not followed by {, from zero to two hexadecimal digits are read,
-but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
-recognized as a hexadecimal escape; otherwise it matches a literal "x".
-Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
-or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
-matches a literal "u".
-</P>
-<P>
Note that \0dd is always an octal code. The treatment of backslash followed by
a non-zero digit is complicated; for details see the section
<a href="pcre2pattern.html#digitsafterbackslash">"Non-printing characters"</a>
@@ -100,6 +85,13 @@ also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
supported in EBCDIC environments. Note that \N not followed by an opening
curly bracket has a different meaning (see below).
</P>
+<P>
+When \x is not followed by {, from zero to two hexadecimal digits are read,
+but if PCRE2_ALT_BSUX is set, \x must be followed by two hexadecimal digits to
+be recognized as a hexadecimal escape; otherwise it matches a literal "x".
+Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits,
+it matches a literal "u".
+</P>
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
<P>
<pre>
@@ -431,23 +423,19 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
<P>
<pre>
- (...) capture group
- (?&#60;name&#62;...) named capture group (Perl)
- (?'name'...) named capture group (Perl)
- (?P&#60;name&#62;...) named capture group (Python)
- (?:...) non-capture group
- (?|...) non-capture group; reset group numbers for
- capture groups in each alternative
-</pre>
-In non-UTF modes, names may contain underscores and ASCII letters and digits;
-in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
-both cases, a name must not start with a digit.
+ (...) capturing group
+ (?&#60;name&#62;...) named capturing group (Perl)
+ (?'name'...) named capturing group (Perl)
+ (?P&#60;name&#62;...) named capturing group (Python)
+ (?:...) non-capturing group
+ (?|...) non-capturing group; reset group numbers for
+ capturing groups in each alternative
+</PRE>
</P>
<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
<P>
<pre>
- (?&#62;...) atomic non-capture group
- (*atomic:...) atomic non-capture group
+ (?&#62;...) atomic, non-capturing group
</PRE>
</P>
<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
@@ -475,7 +463,7 @@ of the group.
Unsetting x or xx unsets both. Several options may be set at once, and a
mixture of setting and unsetting such as (?i-x) is allowed, but there may be
only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
-(?^in). An option setting may appear at the start of a non-capture group, for
+(?^in). An option setting may appear at the start of a non-capturing group, for
example (?i:...).
</P>
<P>
@@ -526,35 +514,14 @@ setting with a similar syntax.
<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<P>
<pre>
- (?=...) )
- (*pla:...) ) positive lookahead
- (*positive_lookahead:...) )
-
- (?!...) )
- (*nla:...) ) negative lookahead
- (*negative_lookahead:...) )
-
- (?&#60;=...) )
- (*plb:...) ) positive lookbehind
- (*positive_lookbehind:...) )
-
- (?&#60;!...) )
- (*nlb:...) ) negative lookbehind
- (*negative_lookbehind:...) )
+ (?=...) positive look ahead
+ (?!...) negative look ahead
+ (?&#60;=...) positive look behind
+ (?&#60;!...) negative look behind
</pre>
-Each top-level branch of a lookbehind must be of a fixed length.
-</P>
-<br><a name="SEC20" href="#TOC1">SCRIPT RUNS</a><br>
-<P>
-<pre>
- (*script_run:...) ) script run, can be backtracked into
- (*sr:...) )
-
- (*atomic_script_run:...) ) atomic script run
- (*asr:...) )
-</PRE>
+Each top-level branch of a look behind must be of a fixed length.
</P>
-<br><a name="SEC21" href="#TOC1">BACKREFERENCES</a><br>
+<br><a name="SEC20" href="#TOC1">BACKREFERENCES</a><br>
<P>
<pre>
\n reference by number (can be ambiguous)
@@ -571,26 +538,26 @@ Each top-level branch of a lookbehind must be of a fixed length.
(?P=name) reference by name (Python)
</PRE>
</P>
-<br><a name="SEC22" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
+<br><a name="SEC21" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<P>
<pre>
(?R) recurse whole pattern
- (?n) call subroutine by absolute number
- (?+n) call subroutine by relative number
- (?-n) call subroutine by relative number
- (?&name) call subroutine by name (Perl)
- (?P&#62;name) call subroutine by name (Python)
- \g&#60;name&#62; call subroutine by name (Oniguruma)
- \g'name' call subroutine by name (Oniguruma)
- \g&#60;n&#62; call subroutine by absolute number (Oniguruma)
- \g'n' call subroutine by absolute number (Oniguruma)
- \g&#60;+n&#62; call subroutine by relative number (PCRE2 extension)
- \g'+n' call subroutine by relative number (PCRE2 extension)
- \g&#60;-n&#62; call subroutine by relative number (PCRE2 extension)
- \g'-n' call subroutine by relative number (PCRE2 extension)
+ (?n) call subpattern by absolute number
+ (?+n) call subpattern by relative number
+ (?-n) call subpattern by relative number
+ (?&name) call subpattern by name (Perl)
+ (?P&#62;name) call subpattern by name (Python)
+ \g&#60;name&#62; call subpattern by name (Oniguruma)
+ \g'name' call subpattern by name (Oniguruma)
+ \g&#60;n&#62; call subpattern by absolute number (Oniguruma)
+ \g'n' call subpattern by absolute number (Oniguruma)
+ \g&#60;+n&#62; call subpattern by relative number (PCRE2 extension)
+ \g'+n' call subpattern by relative number (PCRE2 extension)
+ \g&#60;-n&#62; call subpattern by relative number (PCRE2 extension)
+ \g'-n' call subpattern by relative number (PCRE2 extension)
</PRE>
</P>
-<br><a name="SEC23" href="#TOC1">CONDITIONAL PATTERNS</a><br>
+<br><a name="SEC22" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<P>
<pre>
(?(condition)yes-pattern)
@@ -605,7 +572,7 @@ Each top-level branch of a lookbehind must be of a fixed length.
(?(R) overall recursion condition
(?(Rn) specific numbered group recursion condition
(?(R&name) specific named group recursion condition
- (?(DEFINE) define groups for reference
+ (?(DEFINE) define subpattern for reference
(?(VERSION[&#62;]=n.m) test PCRE2 version
(?(assert) assertion condition
</pre>
@@ -613,7 +580,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
conditions or recursion tests. Such a condition is interpreted as a reference
condition if the relevant named group exists.
</P>
-<br><a name="SEC24" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
@@ -640,7 +607,7 @@ pattern is not anchored.
The effect of one of these verbs in a group called as a subroutine is confined
to the subroutine call.
</P>
-<br><a name="SEC25" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
<P>
<pre>
(?C) callout (assumed number 0)
@@ -651,12 +618,12 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
start and the end), and the starting delimiter { matched with the ending
delimiter }. To encode the ending delimiter within the string, double it.
</P>
-<br><a name="SEC26" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
<b>pcre2matching</b>(3), <b>pcre2</b>(3).
</P>
-<br><a name="SEC27" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -665,11 +632,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC28" href="#TOC1">REVISION</a><br>
+<br><a name="SEC27" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 11 February 2019
+Last updated: 02 September 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2test.html b/dist2/doc/html/pcre2test.html
index 083d5cc3..af2b18cb 100644
--- a/dist2/doc/html/pcre2test.html
+++ b/dist2/doc/html/pcre2test.html
@@ -606,10 +606,8 @@ for a description of the effects of these options.
/s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
- escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
/x extended set PCRE2_EXTENDED
/xx extended_more set PCRE2_EXTENDED_MORE
- extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
firstline set PCRE2_FIRSTLINE
literal set PCRE2_LITERAL
match_line set PCRE2_EXTRA_MATCH_LINE
@@ -717,14 +715,14 @@ information is obtained from the <b>pcre2_pattern_info()</b> function. Here are
some typical examples:
<pre>
re&#62; /(?i)(^a|^b)/m,info
- Capture group count = 1
+ Capturing subpattern count = 1
Compile options: multiline
Overall options: caseless multiline
First code unit at start or follows newline
Subject length lower bound = 1
re&#62; /(?i)abc/info
- Capture group count = 0
+ Capturing subpattern count = 0
Compile options: &#60;none&#62;
Overall options: caseless
First code unit = 'a' (caseless)
@@ -1043,7 +1041,6 @@ process.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text
altglobal alternative global matching
/g global global matching
@@ -1051,11 +1048,8 @@ process.
mark show mark values
replace=&#60;string&#62; specify a replacement string
startchar show starting character when relevant
- substitute_callout use substitution callouts
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=&#60;n&#62; skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=&#60;n&#62; skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
</pre>
@@ -1191,7 +1185,6 @@ pattern.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text (non-JIT only)
altglobal alternative global matching
callout_capture show captures at callout time
@@ -1221,11 +1214,8 @@ pattern.
replace=&#60;string&#62; specify a replacement string
startchar show startchar when relevant
startoffset=&#60;n&#62; same as offset=&#60;n&#62;
- substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=&#60;n&#62; skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=&#60;n&#62; skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
zero_terminate pass the subject as zero-terminated
@@ -1291,28 +1281,10 @@ captured parentheses be output after a match. By default, only those up to the
highest one actually used in the match are output (corresponding to the return
code from <b>pcre2_match()</b>). Groups that did not take part in the match
are output as "&#60;unset&#62;". This modifier is not relevant for DFA matching (which
-does no capturing) and does not apply when <b>replace</b> is specified; it is
-ignored, with a warning message, if present.
+does no capturing); it is ignored, with a warning message, if present.
</P>
<br><b>
-Showing the entire ovector, for all outcomes
-</b><br>
-<P>
-The <b>allvector</b> modifier requests that the entire ovector be shown,
-whatever the outcome of the match. Compare <b>allcaptures</b>, which shows only
-up to the maximum number of capture groups for the pattern, and then only for a
-successful complete non-DFA match. This modifier, which acts after any match
-result, and also for DFA matching, provides a means of checking that there are
-no unexpected modifications to ovector fields. Before each match attempt, the
-ovector is filled with a special value, and if this is found in both elements
-of a capturing pair, "&#60;unchanged&#62;" is output. After a successful match, this
-applies to all groups after the maximum capture group for the pattern. In other
-cases it applies to the entire ovector. After a partial match, the first two
-elements are the only ones that should be set. After a DFA match, the amount of
-ovector that is used depends on the number of matches that were found.
-</P>
-<br><b>
-Testing pattern callouts
+Testing callouts
</b><br>
<P>
A callout function is supplied when <b>pcre2test</b> calls the library matching
@@ -1320,9 +1292,6 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
controlled by various modifiers listed above whose names begin with
<b>callout_</b>. Details are given in the section entitled "Callouts"
<a href="#callouts">below.</a>
-Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
-"Testing the substitution function"
-<a href="#substitution">below.</a>
</P>
<br><b>
Finding all matches in a string
@@ -1354,8 +1323,8 @@ Testing substring extraction functions
<P>
The <b>copy</b> and <b>get</b> modifiers can be used to test the
<b>pcre2_substring_copy_xxx()</b> and <b>pcre2_substring_get_xxx()</b> functions.
-They can be given more than once, and each can specify a capture group name or
-number, for example:
+They can be given more than once, and each can specify a group name or number,
+for example:
<pre>
abcd\=copy=1,copy=3,get=G1
</pre>
@@ -1374,7 +1343,7 @@ instead of a colon. This is in addition to the normal full list. The string
length (that is, the return from the extraction function) is given in
parentheses after each substring, followed by the name when the extraction was
by name.
-<a name="substitution"></a></P>
+</P>
<br><b>
Testing the substitution function
</b><br>
@@ -1432,10 +1401,10 @@ The default action of <b>pcre2_substitute()</b> is to return
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
<b>substitute_overflow_length</b> modifier), <b>pcre2_substitute()</b> continues
-to go through the motions of matching and substituting (but not doing any
-callouts), in order to compute the size of buffer that is required. When this
-happens, <b>pcre2test</b> shows the required buffer length (which includes space
-for the trailing zero) as part of the error message. For example:
+to go through the motions of matching and substituting, in order to compute the
+size of buffer that is required. When this happens, <b>pcre2test</b> shows the
+required buffer length (which includes space for the trailing zero) as part of
+the error message. For example:
<pre>
/abc/substitute_overflow_length
123abc123\=replace=[9]XYZ
@@ -1446,49 +1415,6 @@ matching provokes an error return ("bad option value") from
<b>pcre2_substitute()</b>.
</P>
<br><b>
-Testing substitute callouts
-</b><br>
-<P>
-If the <b>substitute_callout</b> modifier is set, a substitution callout
-function is set up. The <b>null_context</b> modifier must not be set, because
-the address of the callout function is passed in a match context. When the
-callout function is called (after each substitution), details of the the input
-and output strings are output. For example:
-<pre>
- /abc/g,replace=&#60;$0&#62;,substitute_callout
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62;"
- 2(1) Old 6 9 "abc" New 8 13 "&#60;abc&#62;"
- 2: &#60;abc&#62;def&#60;abc&#62;pqr
-</pre>
-The first number on each callout line is the count of matches. The
-parenthesized number is the number of pairs that are set in the ovector (that
-is, one more than the number of capturing groups that were set). Then are
-listed the offsets of the old substring, its contents, and the same for the
-replacement.
-</P>
-<P>
-By default, the substitution callout function returns zero, which accepts the
-replacement and causes matching to continue if /g was used. Two further
-modifiers can be used to test other return values. If <b>substitute_skip</b> is
-set to a value greater than zero the callout function returns +1 for the match
-of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
-replacement to be rejected, and -1 causes no further matching to take place. If
-either of them are set, <b>substitute_callout</b> is assumed. For example:
-<pre>
- /abc/g,replace=&#60;$0&#62;,substitute_skip=1
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62; SKIPPED"
- 2(1) Old 6 9 "abc" New 6 11 "&#60;abc&#62;"
- 2: abcdef&#60;abc&#62;pqr
- abcdefabcpqr\=substitute_stop=1
- 1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62; STOPPED"
- 1: abcdefabcpqr
-</pre>
-If both are set for the same number, stop takes precedence. Only a single skip
-or stop is supported, which is sufficient for testing that the feature works.
-</P>
-<br><b>
Setting the JIT stack size
</b><br>
<P>
@@ -1628,11 +1554,11 @@ Passing a NULL context
</b><br>
<P>
Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
-<b>pcre2_dfa_match()</b>, <b>pcre2_jit_match()</b> or <b>pcre2_substitute()</b>.
-If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
-testing that the matching and substitution functions behave correctly in this
-case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>pcre2_dfa_match()</b> or <b>pcre2_jit_match()</b>. If the <b>null_context</b>
+modifier is set, however, NULL is passed. This is for testing that the matching
+functions behave correctly in this case (they use default values). This
+modifier cannot be used with the <b>find_limits</b> modifier or when testing the
+substitution function.
</P>
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
<P>
@@ -2078,9 +2004,9 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 11 March 2019
+Last updated: 21 July 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/html/pcre2unicode.html b/dist2/doc/html/pcre2unicode.html
index c11c7c2b..24f6d936 100644
--- a/dist2/doc/html/pcre2unicode.html
+++ b/dist2/doc/html/pcre2unicode.html
@@ -38,11 +38,10 @@ UNICODE PROPERTY SUPPORT
</b><br>
<P>
When PCRE2 is built with Unicode support, the escape sequences \p{..},
-\P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+\P{..}, and \X can be used. The Unicode properties that can be tested are
+limited to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
+the derived properties Any and L&. Full lists are given in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
and
<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
@@ -74,17 +73,11 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit.
</P>
<P>
-In UTF modes, capture group names are not restricted to ASCII, and may contain
-any Unicode letters and decimal digits, as well as underscore.
-</P>
-<P>
The escape sequence \C can be used to match a single code unit in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \C in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
-documentation). For this reason, there is a build-time option that disables
-support for \C completely. There is also a less draconian compile-time option
-for locking out the use of \C when a pattern is compiled.
+documentation).
</P>
<P>
The use of \C is not supported by the alternative matching function
@@ -131,150 +124,17 @@ for characters whose code points are less than 128 and that have at most two
case-equivalent values. For these, a direct table lookup is used for speed. A
few Unicode characters such as Greek sigma have more than two code points that
are case-equivalent, and these are treated as such.
-<a name="scriptruns"></a></P>
-<br><b>
-SCRIPT RUNS
-</b><br>
-<P>
-The pattern constructs (*script_run:...) and (*atomic_script_run:...), with
-synonyms (*sr:...) and (*asr:...), verify that the string matched within the
-parentheses is a script run. In concept, a script run is a sequence of
-characters that are all from the same Unicode script. However, because some
-scripts are commonly used together, and because some diacritical and other
-marks are used with multiple scripts, it is not that simple.
-</P>
-<P>
-Every Unicode character has a Script property, mostly with a value
-corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
-are also three special values:
-</P>
-<P>
-"Unknown" is used for code points that have not been assigned, and also for the
-surrogate code points. In the PCRE2 32-bit library, characters whose code
-points are greater than the Unicode maximum (U+10FFFF), which are accessible
-only in non-UTF mode, are assigned the Unknown script.
-</P>
-<P>
-"Common" is used for characters that are used with many scripts. These include
-punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII
-digits 0 to 9.
-</P>
-<P>
-"Inherited" is used for characters such as diacritical marks that modify a
-previous character. These are considered to take on the script of the character
-that they modify.
-</P>
-<P>
-Some Inherited characters are used with many scripts, but many of them are only
-normally used with a small number of scripts. For example, U+102E0 (Coptic
-Epact thousands mark) is used only with Arabic and Coptic. In order to make it
-possible to check this, a Unicode property called Script Extension exists. Its
-value is a list of scripts that apply to the character. For the majority of
-characters, the list contains just one script, the same one as the Script
-property. However, for characters such as U+102E0 more than one Script is
-listed. There are also some Common characters that have a single, non-Common
-script in their Script Extension list.
-</P>
-<P>
-The next section describes the basic rules for deciding whether a given string
-of characters is a script run. Note, however, that there are some special cases
-involving the Chinese Han script, and an additional constraint for decimal
-digits. These are covered in subsequent sections.
-</P>
-<br><b>
-Basic script run rules
-</b><br>
-<P>
-A string that is less than two characters long is a script run. This is the
-only case in which an Unknown character can be part of a script run. Longer
-strings are checked using only the Script Extensions property, not the basic
-Script property.
-</P>
-<P>
-If a character's Script Extension property is the single value "Inherited", it
-is always accepted as part of a script run. This is also true for the property
-"Common", subject to the checking of decimal digits described below. All the
-remaining characters in a script run must have at least one script in common in
-their Script Extension lists. In set-theoretic terminology, the intersection of
-all the sets of scripts must not be empty.
-</P>
-<P>
-A simple example is an Internet name such as "google.com". The letters are all
-in the Latin script, and the dot is Common, so this string is a script run.
-However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
-string that looks the same, but with Cyrillic "o"s is not a script run.
-</P>
-<P>
-More interesting examples involve characters with more than one script in their
-Script Extension. Consider the following characters:
-<pre>
- U+060C Arabic comma
- U+06D4 Arabic full stop
-</pre>
-The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
-Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
-appear in script runs of either Arabic or Hanifi Rohingya. The first could also
-appear in Syriac or Thaana script runs, but the second could not.
-</P>
-<br><b>
-The Chinese Han script
-</b><br>
-<P>
-The Chinese Han script is commonly used in conjunction with other scripts for
-writing certain languages. Japanese uses the Hiragana and Katakana scripts
-together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
-and Han. These three combinations are treated as special cases when checking
-script runs and are, in effect, "virtual scripts". Thus, a script run may
-contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and
-Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of
-Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical
-Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/)
-in allowing such mixtures.
-</P>
-<br><b>
-Decimal digits
-</b><br>
-<P>
-Unicode contains many sets of 10 decimal digits in different scripts, and some
-scripts (including the Common script) contain more than one set. Some of these
-decimal digits them are visually indistinguishable from the common ASCII
-digits. In addition to the script checking described above, if a script run
-contains any decimal digits, they must all come from the same set of 10
-adjacent characters.
</P>
<br><b>
VALIDITY OF UTF STRINGS
</b><br>
<P>
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
-are (by default) checked for validity on entry to the relevant functions. If an
-invalid UTF string is passed, an negative error code is returned. The code unit
-offset to the offending character can be extracted from the match data block by
-calling <b>pcre2_get_startchar()</b>, which is used for this purpose after a UTF
-error.
-</P>
-<P>
-In some situations, you may already know that your strings are valid, and
-therefore want to skip these checks in order to improve performance, for
-example in the case of a long subject string that is being scanned repeatedly.
-If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
-PCRE2 assumes that the pattern or subject it is given (respectively) contains
-only valid UTF code unit sequences.
-</P>
-<P>
-If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
-is usually undefined and your program may crash or loop indefinitely. There is,
-however, one mode of matching that can handle invalid UTF subject strings. This
-is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option
-when calling <b>pcre2_jit_compile()</b>. For details, see the
-<a href="pcre2jit.html"><b>pcre2jit</b></a>
-documentation.
-</P>
-<P>
-Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
-the pattern; it does not also apply to subject strings. If you want to disable
-the check for a subject string you must pass this same option to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>.
+are (by default) checked for validity on entry to the relevant functions.
+If an invalid UTF string is passed, an negative error code is returned. The
+code unit offset to the offending character can be extracted from the match
+data block by calling <b>pcre2_get_startchar()</b>, which is used for this
+purpose after a UTF error.
</P>
<P>
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
@@ -282,14 +142,13 @@ as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
strings to be in host byte order.
</P>
<P>
-Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other
-processing takes place. In the case of <b>pcre2_match()</b> and
-<b>pcre2_dfa_match()</b> calls with a non-zero starting offset, the check is
-applied only to that part of the subject that could be inspected during
-matching, and there is a check that the starting offset points to the first
-code unit of a character or to the end of the subject. If there are no
-lookbehind assertions in the pattern, the check starts at the starting offset.
-Otherwise, it starts at the length of the longest lookbehind before the
+A UTF string is checked before any other processing takes place. In the case of
+<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> calls with a non-zero starting
+offset, the check is applied only to that part of the subject that could be
+inspected during matching, and there is a check that the starting offset points
+to the first code unit of a character or to the end of the subject. If there
+are no lookbehind assertions in the pattern, the check starts at the starting
+offset. Otherwise, it starts at the length of the longest lookbehind before the
starting offset, or at the start of the subject if there are not that many
characters before the starting offset. Note that the sequences \b and \B are
one-character lookbehinds.
@@ -309,12 +168,31 @@ surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
UTF-32.)
</P>
<P>
-Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is
-given if an escape sequence for an invalid Unicode code point is encountered in
-the pattern. If you want to allow escape sequences such as \x{d800} (a
-surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
-option. However, this is possible only in UTF-8 and UTF-32 modes, because these
-values are not representable in UTF-16.
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
+PCRE2 assumes that the pattern or subject it is given (respectively) contains
+only valid UTF code unit sequences.
+</P>
+<P>
+Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
+the pattern; it does not also apply to subject strings. If you want to disable
+the check for a subject string you must pass this option to <b>pcre2_match()</b>
+or <b>pcre2_dfa_match()</b>.
+</P>
+<P>
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely.
+</P>
+<P>
+Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
+that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. If you want to allow escape sequences such as
+\x{d800} (a surrogate code point) you can set the
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
+only in UTF-8 and UTF-32 modes, because these values are not representable in
+UTF-16.
<a name="utf8strings"></a></P>
<br><b>
Errors in UTF-8 strings
@@ -422,9 +300,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 06 March 2019
+Last updated: 02 September 2018
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/dist2/doc/pcre2.3 b/dist2/doc/pcre2.3
index 75450ecd..2f836585 100644
--- a/dist2/doc/pcre2.3
+++ b/dist2/doc/pcre2.3
@@ -1,4 +1,4 @@
-.TH PCRE2 3 "17 September 2018" "PCRE2 10.33"
+.TH PCRE2 3 "11 July 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH INTRODUCTION
@@ -156,7 +156,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2-config show PCRE2 installation configuration information
pcre2api details of PCRE2's native C API
pcre2build building PCRE2
- pcre2callout details of the pattern callout feature
+ pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
@@ -197,6 +197,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
.rs
.sp
.nf
-Last updated: 17 September 2018
+Last updated: 11 July 2018
Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2.txt b/dist2/doc/pcre2.txt
index 9c956e8c..30ba2f9e 100644
--- a/dist2/doc/pcre2.txt
+++ b/dist2/doc/pcre2.txt
@@ -141,7 +141,7 @@ USER DOCUMENTATION
pcre2-config show PCRE2 installation configuration information
pcre2api details of PCRE2's native C API
pcre2build building PCRE2
- pcre2callout details of the pattern callout feature
+ pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
@@ -177,7 +177,7 @@ AUTHOR
REVISION
- Last updated: 17 September 2018
+ Last updated: 11 July 2018
Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -293,10 +293,6 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
int (*callout_function)(pcre2_callout_block *, void *),
void *callout_data);
- int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
- int (*callout_function)(pcre2_substitute_callout_block *, void *),
- void *callout_data);
-
int pcre2_set_offset_limit(pcre2_match_context *mcontext,
PCRE2_SIZE value);
@@ -402,8 +398,7 @@ PCRE2 NATIVE API AUXILIARY FUNCTIONS
const unsigned char *pcre2_maketables(pcre2_general_context *gcontext);
- int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
- void *where);
+ int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
int pcre2_callout_enumerate(const pcre2_code *code,
int (*callback)(pcre2_callout_enumerate_block *, void *),
@@ -882,11 +877,11 @@ PCRE2 CONTEXTS
int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
uint32_t value);
- This parameter adjusts the limit, set when PCRE2 is built (default
- 250), on the depth of parenthesis nesting in a pattern. This limit
- stops rogue patterns using up too much system stack when being com-
- piled. The limit applies to parentheses of all kinds, not just captur-
- ing parentheses.
+ This parameter ajusts the limit, set when PCRE2 is built (default 250),
+ on the depth of parenthesis nesting in a pattern. This limit stops
+ rogue patterns using up too much system stack when being compiled. The
+ limit applies to parentheses of all kinds, not just capturing parenthe-
+ ses.
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard_function)(uint32_t, void *), void *user_data);
@@ -938,18 +933,10 @@ PCRE2 CONTEXTS
int (*callout_function)(pcre2_callout_block *, void *),
void *callout_data);
- This sets up a callout function for PCRE2 to call at specified points
+ This sets up a "callout" function for PCRE2 to call at specified points
during a matching operation. Details are given in the pcre2callout doc-
umentation.
- int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
- int (*callout_function)(pcre2_substitute_callout_block *, void *),
- void *callout_data);
-
- This sets up a callout function for PCRE2 to call after each substitu-
- tion made by pcre2_substitute(). Details are given in the section enti-
- tled "Creating a new string with substitutions" below.
-
int pcre2_set_offset_limit(pcre2_match_context *mcontext,
PCRE2_SIZE value);
@@ -961,7 +948,7 @@ PCRE2 CONTEXTS
more substitutions.
For example, if the pattern /abc/ is matched against "123abc" with an
- offset limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match
+ offset limit less than 3, the result is PCRE2_ERROR_NO_MATCH. A match
can never be found if the startoffset argument of pcre2_match(),
pcre2_dfa_match(), or pcre2_substitute() is greater than the offset
limit set in the match context.
@@ -1297,68 +1284,65 @@ COMPILING A PATTERN
Copies of both the code and the tables are made, with the new code
pointing to the new tables. The memory for the new tables is automati-
cally freed when pcre2_code_free() is called for the new copy of the
- compiled code. If pcre2_code_copy_with_tables() is called with a NULL
+ compiled code. If pcre2_code_copy_withy_tables() is called with a NULL
argument, it returns NULL.
NOTE: When one of the matching functions is called, pointers to the
compiled pattern and the subject string are set in the match data block
- so that they can be referenced by the substring extraction functions
- after a successful match. After running a match, you must not free a
- compiled pattern or a subject string until after all operations on the
- match data block have taken place, unless, in the case of the subject
- string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is
- described in the section entitled "Option bits for pcre2_match()"
- below.
-
- The options argument for pcre2_compile() contains various bit settings
- that affect the compilation. It should be zero if none of them are
- required. The available options are described below. Some of them (in
- particular, those that are compatible with Perl, but some others as
- well) can also be set and unset from within the pattern (see the
+ so that they can be referenced by the substring extraction functions.
+ After running a match, you must not free a compiled pattern (or a sub-
+ ject string) until after all operations on the match data block have
+ taken place.
+
+ The options argument for pcre2_compile() contains various bit settings
+ that affect the compilation. It should be zero if no options are
+ required. The available options are described below. Some of them (in
+ particular, those that are compatible with Perl, but some others as
+ well) can also be set and unset from within the pattern (see the
detailed description in the pcre2pattern documentation).
- For those options that can be different in different parts of the pat-
- tern, the contents of the options argument specifies their settings at
- the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
- PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
+ For those options that can be different in different parts of the pat-
+ tern, the contents of the options argument specifies their settings at
+ the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
+ PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
as at compile time.
- Some additional options and less frequently required compile-time
- parameters (for example, the newline setting) can be provided in a com-
- pile context (as described above).
+ Other, less frequently required compile-time parameters (for example,
+ the newline setting) can be provided in a compile context (as described
+ above).
If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme-
- diately. Otherwise, the variables to which these point are set to an
- error code and an offset (number of code units) within the pattern,
- respectively, when pcre2_compile() returns NULL because a compilation
+ diately. Otherwise, the variables to which these point are set to an
+ error code and an offset (number of code units) within the pattern,
+ respectively, when pcre2_compile() returns NULL because a compilation
error has occurred. The values are not defined when compilation is suc-
cessful and pcre2_compile() returns a non-NULL value.
- There are nearly 100 positive error codes that pcre2_compile() may
- return if it finds an error in the pattern. There are also some nega-
- tive error codes that are used for invalid UTF strings. These are the
+ There are nearly 100 positive error codes that pcre2_compile() may
+ return if it finds an error in the pattern. There are also some nega-
+ tive error codes that are used for invalid UTF strings. These are the
same as given by pcre2_match() and pcre2_dfa_match(), and are described
- in the pcre2unicode page. There is no separate documentation for the
- positive error codes, because the textual error messages that are
- obtained by calling the pcre2_get_error_message() function (see
- "Obtaining a textual error message" below) should be self-explanatory.
- Macro names starting with PCRE2_ERROR_ are defined for both positive
+ in the pcre2unicode page. There is no separate documentation for the
+ positive error codes, because the textual error messages that are
+ obtained by calling the pcre2_get_error_message() function (see
+ "Obtaining a textual error message" below) should be self-explanatory.
+ Macro names starting with PCRE2_ERROR_ are defined for both positive
and negative error codes in pcre2.h.
The value returned in erroroffset is an indication of where in the pat-
- tern the error occurred. It is not necessarily the furthest point in
- the pattern that was read. For example, after the error "lookbehind
+ tern the error occurred. It is not necessarily the furthest point in
+ the pattern that was read. For example, after the error "lookbehind
assertion is not fixed length", the error offset points to the start of
- the failing assertion. For an invalid UTF-8 or UTF-16 string, the off-
+ the failing assertion. For an invalid UTF-8 or UTF-16 string, the off-
set is that of the first code unit of the failing character.
- Some errors are not detected until the whole pattern has been scanned;
- in these cases, the offset passed back is the length of the pattern.
- Note that the offset is in code units, not characters, even in a UTF
+ Some errors are not detected until the whole pattern has been scanned;
+ in these cases, the offset passed back is the length of the pattern.
+ Note that the offset is in code units, not characters, even in a UTF
mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char-
acter.
- This code fragment shows a typical straightforward call to pcre2_com-
+ This code fragment shows a typical straightforward call to pcre2_com-
pile():
pcre2_code *re;
@@ -1372,485 +1356,469 @@ COMPILING A PATTERN
&erroffset, /* for error offset */
NULL); /* no compile context */
-
- Main compile options
-
- The following names for option bits are defined in the pcre2.h header
+ The following names for option bits are defined in the pcre2.h header
file:
PCRE2_ANCHORED
If this bit is set, the pattern is forced to be "anchored", that is, it
- is constrained to match only at the first matching point in the string
- that is being searched (the "subject string"). This effect can also be
- achieved by appropriate constructs in the pattern itself, which is the
+ is constrained to match only at the first matching point in the string
+ that is being searched (the "subject string"). This effect can also be
+ achieved by appropriate constructs in the pattern itself, which is the
only way to do it in Perl.
PCRE2_ALLOW_EMPTY_CLASS
- By default, for compatibility with Perl, a closing square bracket that
- immediately follows an opening one is treated as a data character for
- the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the
+ By default, for compatibility with Perl, a closing square bracket that
+ immediately follows an opening one is treated as a data character for
+ the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the
class, which therefore contains no characters and so can never match.
PCRE2_ALT_BSUX
- This option request alternative handling of three escape sequences,
- which makes PCRE2's behaviour more like ECMAscript (aka JavaScript).
+ This option request alternative handling of three escape sequences,
+ which makes PCRE2's behaviour more like ECMAscript (aka JavaScript).
When it is set:
(1) \U matches an upper case "U" character; by default \U causes a com-
pile time error (Perl uses \U to upper case subsequent characters).
(2) \u matches a lower case "u" character unless it is followed by four
- hexadecimal digits, in which case the hexadecimal number defines the
- code point to match. By default, \u causes a compile time error (Perl
+ hexadecimal digits, in which case the hexadecimal number defines the
+ code point to match. By default, \u causes a compile time error (Perl
uses it to upper case the following character).
- (3) \x matches a lower case "x" character unless it is followed by two
- hexadecimal digits, in which case the hexadecimal number defines the
- code point to match. By default, as in Perl, a hexadecimal number is
+ (3) \x matches a lower case "x" character unless it is followed by two
+ hexadecimal digits, in which case the hexadecimal number defines the
+ code point to match. By default, as in Perl, a hexadecimal number is
always expected after \x, but it may have zero, one, or two digits (so,
for example, \xz matches a binary zero character followed by z).
- ECMAscript 6 added additional functionality to \u. This can be accessed
- using the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile
- options" below). Note that this alternative escape handling applies
- only to patterns. Neither of these options affects the processing of
- replacement strings passed to pcre2_substitute().
-
PCRE2_ALT_CIRCUMFLEX
In multiline mode (when PCRE2_MULTILINE is set), the circumflex
- metacharacter matches at the start of the subject (unless PCRE2_NOTBOL
- is set), and also after any internal newline. However, it does not
+ metacharacter matches at the start of the subject (unless PCRE2_NOTBOL
+ is set), and also after any internal newline. However, it does not
match after a newline at the end of the subject, for compatibility with
- Perl. If you want a multiline circumflex also to match after a termi-
+ Perl. If you want a multiline circumflex also to match after a termi-
nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
PCRE2_ALT_VERBNAMES
- By default, for compatibility with Perl, the name in any verb sequence
- such as (*MARK:NAME) is any sequence of characters that does not
- include a closing parenthesis. The name is not processed in any way,
- and it is not possible to include a closing parenthesis in the name.
- However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash
- processing is applied to verb names and only an unescaped closing
- parenthesis terminates the name. A closing parenthesis can be included
- in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or
- PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped
- whitespace in verb names is skipped and #-comments are recognized,
+ By default, for compatibility with Perl, the name in any verb sequence
+ such as (*MARK:NAME) is any sequence of characters that does not
+ include a closing parenthesis. The name is not processed in any way,
+ and it is not possible to include a closing parenthesis in the name.
+ However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash
+ processing is applied to verb names and only an unescaped closing
+ parenthesis terminates the name. A closing parenthesis can be included
+ in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or
+ PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped
+ whitespace in verb names is skipped and #-comments are recognized,
exactly as in the rest of the pattern.
PCRE2_AUTO_CALLOUT
- If this bit is set, pcre2_compile() automatically inserts callout
- items, all with number 255, before each pattern item, except immedi-
- ately before or after an explicit callout in the pattern. For discus-
+ If this bit is set, pcre2_compile() automatically inserts callout
+ items, all with number 255, before each pattern item, except immedi-
+ ately before or after an explicit callout in the pattern. For discus-
sion of the callout facility, see the pcre2callout documentation.
PCRE2_CASELESS
- If this bit is set, letters in the pattern match both upper and lower
- case letters in the subject. It is equivalent to Perl's /i option, and
- it can be changed within a pattern by a (?i) option setting. If
- PCRE2_UTF is set, Unicode properties are used for all characters with
- more than one other case, and for all characters whose code points are
- greater than U+007F. For lower valued characters with only one other
- case, a lookup table is used for speed. When PCRE2_UTF is not set, a
+ If this bit is set, letters in the pattern match both upper and lower
+ case letters in the subject. It is equivalent to Perl's /i option, and
+ it can be changed within a pattern by a (?i) option setting. If
+ PCRE2_UTF is set, Unicode properties are used for all characters with
+ more than one other case, and for all characters whose code points are
+ greater than U+007F. For lower valued characters with only one other
+ case, a lookup table is used for speed. When PCRE2_UTF is not set, a
lookup table is used for all code points less than 256, and higher code
- points (available only in 16-bit or 32-bit mode) are treated as not
+ points (available only in 16-bit or 32-bit mode) are treated as not
having another case.
PCRE2_DOLLAR_ENDONLY
- If this bit is set, a dollar metacharacter in the pattern matches only
- at the end of the subject string. Without this option, a dollar also
- matches immediately before a newline at the end of the string (but not
- before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored
- if PCRE2_MULTILINE is set. There is no equivalent to this option in
+ If this bit is set, a dollar metacharacter in the pattern matches only
+ at the end of the subject string. Without this option, a dollar also
+ matches immediately before a newline at the end of the string (but not
+ before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored
+ if PCRE2_MULTILINE is set. There is no equivalent to this option in
Perl, and no way to set it within a pattern.
PCRE2_DOTALL
- If this bit is set, a dot metacharacter in the pattern matches any
- character, including one that indicates a newline. However, it only
+ If this bit is set, a dot metacharacter in the pattern matches any
+ character, including one that indicates a newline. However, it only
ever matches one character, even if newlines are coded as CRLF. Without
this option, a dot does not match when the current position in the sub-
- ject is at a newline. This option is equivalent to Perl's /s option,
+ ject is at a newline. This option is equivalent to Perl's /s option,
and it can be changed within a pattern by a (?s) option setting. A neg-
- ative class such as [^a] always matches newline characters, and the \N
- escape sequence always matches a non-newline character, independent of
+ ative class such as [^a] always matches newline characters, and the \N
+ escape sequence always matches a non-newline character, independent of
the setting of PCRE2_DOTALL.
PCRE2_DUPNAMES
- If this bit is set, names used to identify capture groups need not be
- unique. This can be helpful for certain types of pattern when it is
- known that only one instance of the named group can ever be matched.
- There are more details of named capture groups below; see also the
- pcre2pattern documentation.
+ If this bit is set, names used to identify capturing subpatterns need
+ not be unique. This can be helpful for certain types of pattern when it
+ is known that only one instance of the named subpattern can ever be
+ matched. There are more details of named subpatterns below; see also
+ the pcre2pattern documentation.
PCRE2_ENDANCHORED
- If this bit is set, the end of any pattern match must be right at the
+ If this bit is set, the end of any pattern match must be right at the
end of the string being searched (the "subject string"). If the pattern
match succeeds by reaching (*ACCEPT), but does not reach the end of the
- subject, the match fails at the current starting point. For unanchored
- patterns, a new match is then tried at the next starting point. How-
+ subject, the match fails at the current starting point. For unanchored
+ patterns, a new match is then tried at the next starting point. How-
ever, if the match succeeds by reaching the end of the pattern, but not
- the end of the subject, backtracking occurs and an alternative match
+ the end of the subject, backtracking occurs and an alternative match
may be found. Consider these two patterns:
.(*ACCEPT)|..
.|..
- If matched against "abc" with PCRE2_ENDANCHORED set, the first matches
- "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED
- can also be achieved by appropriate constructs in the pattern itself,
+ If matched against "abc" with PCRE2_ENDANCHORED set, the first matches
+ "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED
+ can also be achieved by appropriate constructs in the pattern itself,
which is the only way to do it in Perl.
For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only
- to the first (that is, the longest) matched string. Other parallel
- matches, which are necessarily substrings of the first one, must obvi-
+ to the first (that is, the longest) matched string. Other parallel
+ matches, which are necessarily substrings of the first one, must obvi-
ously end before the end of the subject.
PCRE2_EXTENDED
- If this bit is set, most white space characters in the pattern are
- totally ignored except when escaped or inside a character class. How-
- ever, white space is not allowed within sequences such as (?> that
- introduce various parenthesized groups, nor within numerical quanti-
- fiers such as {1,3}. Ignorable white space is permitted between an item
- and a following quantifier and between a quantifier and a following +
- that indicates possessiveness. PCRE2_EXTENDED is equivalent to Perl's
- /x option, and it can be changed within a pattern by a (?x) option set-
- ting.
-
- When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog-
- nizes as white space only those characters with code points less than
+ If this bit is set, most white space characters in the pattern are
+ totally ignored except when escaped or inside a character class. How-
+ ever, white space is not allowed within sequences such as (?> that
+ introduce various parenthesized subpatterns, nor within numerical quan-
+ tifiers such as {1,3}. Ignorable white space is permitted between an
+ item and a following quantifier and between a quantifier and a follow-
+ ing + that indicates possessiveness. PCRE2_EXTENDED is equivalent to
+ Perl's /x option, and it can be changed within a pattern by a (?x)
+ option setting.
+
+ When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog-
+ nizes as white space only those characters with code points less than
256 that are flagged as white space in its low-character table. The ta-
ble is normally created by pcre2_maketables(), which uses the isspace()
- function to identify space characters. In most ASCII environments, the
- relevant characters are those with code points 0x0009 (tab), 0x000A
- (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage
+ function to identify space characters. In most ASCII environments, the
+ relevant characters are those with code points 0x0009 (tab), 0x000A
+ (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage
return), and 0x0020 (space).
When PCRE2 is compiled with Unicode support, in addition to these char-
- acters, five more Unicode "Pattern White Space" characters are recog-
+ acters, five more Unicode "Pattern White Space" characters are recog-
nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-
- right mark), U+200F (right-to-left mark), U+2028 (line separator), and
- U+2029 (paragraph separator). This set of characters is the same as
- recognized by Perl's /x option. Note that the horizontal and vertical
- space characters that are matched by the \h and \v escapes in patterns
+ right mark), U+200F (right-to-left mark), U+2028 (line separator), and
+ U+2029 (paragraph separator). This set of characters is the same as
+ recognized by Perl's /x option. Note that the horizontal and vertical
+ space characters that are matched by the \h and \v escapes in patterns
are a much bigger set.
- As well as ignoring most white space, PCRE2_EXTENDED also causes char-
- acters between an unescaped # outside a character class and the next
- newline, inclusive, to be ignored, which makes it possible to include
+ As well as ignoring most white space, PCRE2_EXTENDED also causes char-
+ acters between an unescaped # outside a character class and the next
+ newline, inclusive, to be ignored, which makes it possible to include
comments inside complicated patterns. Note that the end of this type of
- comment is a literal newline sequence in the pattern; escape sequences
+ comment is a literal newline sequence in the pattern; escape sequences
that happen to represent a newline do not count.
Which characters are interpreted as newlines can be specified by a set-
- ting in the compile context that is passed to pcre2_compile() or by a
- special sequence at the start of the pattern, as described in the sec-
- tion entitled "Newline conventions" in the pcre2pattern documentation.
+ ting in the compile context that is passed to pcre2_compile() or by a
+ special sequence at the start of the pattern, as described in the sec-
+ tion entitled "Newline conventions" in the pcre2pattern documentation.
A default is defined when PCRE2 is built.
PCRE2_EXTENDED_MORE
- This option has the effect of PCRE2_EXTENDED, but, in addition,
- unescaped space and horizontal tab characters are ignored inside a
- character class. Note: only these two characters are ignored, not the
- full set of pattern white space characters that are ignored outside a
+ This option has the effect of PCRE2_EXTENDED, but, in addition,
+ unescaped space and horizontal tab characters are ignored inside a
+ character class. Note: only these two characters are ignored, not the
+ full set of pattern white space characters that are ignored outside a
character class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx
- option, and it can be changed within a pattern by a (?xx) option set-
+ option, and it can be changed within a pattern by a (?xx) option set-
ting.
PCRE2_FIRSTLINE
If this option is set, the start of an unanchored pattern match must be
- before or at the first newline in the subject string following the
- start of matching, though the matched text may continue over the new-
+ before or at the first newline in the subject string following the
+ start of matching, though the matched text may continue over the new-
line. If startoffset is non-zero, the limiting newline is not necessar-
- ily the first newline in the subject. For example, if the subject
+ ily the first newline in the subject. For example, if the subject
string is "abc\nxyz" (where \n represents a single-character newline) a
- pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is
- greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
- general limiting facility. If PCRE2_FIRSTLINE is set with an offset
- limit, a match must occur in the first line and also within the offset
+ pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is
+ greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
+ general limiting facility. If PCRE2_FIRSTLINE is set with an offset
+ limit, a match must occur in the first line and also within the offset
limit. In other words, whichever limit comes first is used.
PCRE2_LITERAL
If this option is set, all meta-characters in the pattern are disabled,
- and it is treated as a literal string. Matching literal strings with a
+ and it is treated as a literal string. Matching literal strings with a
regular expression engine is not the most efficient way of doing it. If
- you are doing a lot of literal matching and are worried about effi-
+ you are doing a lot of literal matching and are worried about effi-
ciency, you should consider using other approaches. The only other main
options that are allowed with PCRE2_LITERAL are: PCRE2_ANCHORED,
PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE,
PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and
- PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and
- PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an
+ PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and
+ PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an
error.
PCRE2_MATCH_UNSET_BACKREF
- If this option is set, a backreference to an unset capture group
- matches an empty string (by default this causes the current matching
- alternative to fail). A pattern such as (\1)(a) succeeds when this
- option is set (assuming it can find an "a" in the subject), whereas it
- fails by default, for Perl compatibility. Setting this option makes
+ If this option is set, a backreference to an unset subpattern group
+ matches an empty string (by default this causes the current matching
+ alternative to fail). A pattern such as (\1)(a) succeeds when this
+ option is set (assuming it can find an "a" in the subject), whereas it
+ fails by default, for Perl compatibility. Setting this option makes
PCRE2 behave more like ECMAscript (aka JavaScript).
PCRE2_MULTILINE
- By default, for the purposes of matching "start of line" and "end of
- line", PCRE2 treats the subject string as consisting of a single line
- of characters, even if it actually contains newlines. The "start of
- line" metacharacter (^) matches only at the start of the string, and
- the "end of line" metacharacter ($) matches only at the end of the
+ By default, for the purposes of matching "start of line" and "end of
+ line", PCRE2 treats the subject string as consisting of a single line
+ of characters, even if it actually contains newlines. The "start of
+ line" metacharacter (^) matches only at the start of the string, and
+ the "end of line" metacharacter ($) matches only at the end of the
string, or before a terminating newline (except when PCRE2_DOL-
- LAR_ENDONLY is set). Note, however, that unless PCRE2_DOTALL is set,
+ LAR_ENDONLY is set). Note, however, that unless PCRE2_DOTALL is set,
the "any character" metacharacter (.) does not match at a newline. This
behaviour (for ^, $, and dot) is the same as Perl.
- When PCRE2_MULTILINE it is set, the "start of line" and "end of line"
- constructs match immediately following or immediately before internal
- newlines in the subject string, respectively, as well as at the very
- start and end. This is equivalent to Perl's /m option, and it can be
+ When PCRE2_MULTILINE it is set, the "start of line" and "end of line"
+ constructs match immediately following or immediately before internal
+ newlines in the subject string, respectively, as well as at the very
+ start and end. This is equivalent to Perl's /m option, and it can be
changed within a pattern by a (?m) option setting. Note that the "start
of line" metacharacter does not match after a newline at the end of the
- subject, for compatibility with Perl. However, you can change this by
- setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a
- subject string, or no occurrences of ^ or $ in a pattern, setting
+ subject, for compatibility with Perl. However, you can change this by
+ setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a
+ subject string, or no occurrences of ^ or $ in a pattern, setting
PCRE2_MULTILINE has no effect.
PCRE2_NEVER_BACKSLASH_C
- This option locks out the use of \C in the pattern that is being com-
- piled. This escape can cause unpredictable behaviour in UTF-8 or
- UTF-16 modes, because it may leave the current matching point in the
- middle of a multi-code-unit character. This option may be useful in
- applications that process patterns from external sources. Note that
+ This option locks out the use of \C in the pattern that is being com-
+ piled. This escape can cause unpredictable behaviour in UTF-8 or
+ UTF-16 modes, because it may leave the current matching point in the
+ middle of a multi-code-unit character. This option may be useful in
+ applications that process patterns from external sources. Note that
there is also a build-time option that permanently locks out the use of
\C.
PCRE2_NEVER_UCP
- This option locks out the use of Unicode properties for handling \B,
+ This option locks out the use of Unicode properties for handling \B,
\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as
- described for the PCRE2_UCP option below. In particular, it prevents
- the creator of the pattern from enabling this facility by starting the
- pattern with (*UCP). This option may be useful in applications that
+ described for the PCRE2_UCP option below. In particular, it prevents
+ the creator of the pattern from enabling this facility by starting the
+ pattern with (*UCP). This option may be useful in applications that
process patterns from external sources. The option combination PCRE_UCP
and PCRE_NEVER_UCP causes an error.
PCRE2_NEVER_UTF
- This option locks out interpretation of the pattern as UTF-8, UTF-16,
+ This option locks out interpretation of the pattern as UTF-8, UTF-16,
or UTF-32, depending on which library is in use. In particular, it pre-
- vents the creator of the pattern from switching to UTF interpretation
- by starting the pattern with (*UTF). This option may be useful in
- applications that process patterns from external sources. The combina-
+ vents the creator of the pattern from switching to UTF interpretation
+ by starting the pattern with (*UTF). This option may be useful in
+ applications that process patterns from external sources. The combina-
tion of PCRE2_UTF and PCRE2_NEVER_UTF causes an error.
PCRE2_NO_AUTO_CAPTURE
If this option is set, it disables the use of numbered capturing paren-
- theses in the pattern. Any opening parenthesis that is not followed by
- ? behaves as if it were followed by ?: but named parentheses can still
+ theses in the pattern. Any opening parenthesis that is not followed by
+ ? behaves as if it were followed by ?: but named parentheses can still
be used for capturing (and they acquire numbers in the usual way). This
- is the same as Perl's /n option. Note that, when this option is set,
- references to capture groups (backreferences or recursion/subroutine
- calls) may only refer to named groups, though the reference can be by
+ is the same as Perl's /n option. Note that, when this option is set,
+ references to capturing groups (backreferences or recursion/subroutine
+ calls) may only refer to named groups, though the reference can be by
name or by number.
PCRE2_NO_AUTO_POSSESS
If this option is set, it disables "auto-possessification", which is an
- optimization that, for example, turns a+b into a++b in order to avoid
- backtracks into a+ that can never be successful. However, if callouts
- are in use, auto-possessification means that some callouts are never
+ optimization that, for example, turns a+b into a++b in order to avoid
+ backtracks into a+ that can never be successful. However, if callouts
+ are in use, auto-possessification means that some callouts are never
taken. You can set this option if you want the matching functions to do
- a full unoptimized search and run all the callouts, but it is mainly
+ a full unoptimized search and run all the callouts, but it is mainly
provided for testing purposes.
PCRE2_NO_DOTSTAR_ANCHOR
If this option is set, it disables an optimization that is applied when
- .* is the first significant item in a top-level branch of a pattern,
- and all the other branches also start with .* or with \A or \G or ^.
- The optimization is automatically disabled for .* if it is inside an
- atomic group or a capture group that is the subject of a backreference,
- or if the pattern contains (*PRUNE) or (*SKIP). When the optimization
- is not disabled, such a pattern is automatically anchored if
+ .* is the first significant item in a top-level branch of a pattern,
+ and all the other branches also start with .* or with \A or \G or ^.
+ The optimization is automatically disabled for .* if it is inside an
+ atomic group or a capturing group that is the subject of a backrefer-
+ ence, or if the pattern contains (*PRUNE) or (*SKIP). When the opti-
+ mization is not disabled, such a pattern is automatically anchored if
PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set
- for any ^ items. Otherwise, the fact that any match must start either
- at the start of the subject or following a newline is remembered. Like
+ for any ^ items. Otherwise, the fact that any match must start either
+ at the start of the subject or following a newline is remembered. Like
other optimizations, this can cause callouts to be skipped.
PCRE2_NO_START_OPTIMIZE
- This is an option whose main effect is at matching time. It does not
+ This is an option whose main effect is at matching time. It does not
change what pcre2_compile() generates, but it does affect the output of
the JIT compiler.
- There are a number of optimizations that may occur at the start of a
- match, in order to speed up the process. For example, if it is known
- that an unanchored match must start with a specific code unit value,
- the matching code searches the subject for that value, and fails imme-
- diately if it cannot find it, without actually running the main match-
- ing function. This means that a special item such as (*COMMIT) at the
- start of a pattern is not considered until after a suitable starting
- point for the match has been found. Also, when callouts or (*MARK)
- items are in use, these "start-up" optimizations can cause them to be
- skipped if the pattern is never actually used. The start-up optimiza-
- tions are in effect a pre-scan of the subject that takes place before
+ There are a number of optimizations that may occur at the start of a
+ match, in order to speed up the process. For example, if it is known
+ that an unanchored match must start with a specific code unit value,
+ the matching code searches the subject for that value, and fails imme-
+ diately if it cannot find it, without actually running the main match-
+ ing function. This means that a special item such as (*COMMIT) at the
+ start of a pattern is not considered until after a suitable starting
+ point for the match has been found. Also, when callouts or (*MARK)
+ items are in use, these "start-up" optimizations can cause them to be
+ skipped if the pattern is never actually used. The start-up optimiza-
+ tions are in effect a pre-scan of the subject that takes place before
the pattern is run.
The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
- possibly causing performance to suffer, but ensuring that in cases
- where the result is "no match", the callouts do occur, and that items
+ possibly causing performance to suffer, but ensuring that in cases
+ where the result is "no match", the callouts do occur, and that items
such as (*COMMIT) and (*MARK) are considered at every possible starting
position in the subject string.
- Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching
+ Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching
operation. Consider the pattern
(*COMMIT)ABC
- When this is compiled, PCRE2 records the fact that a match must start
- with the character "A". Suppose the subject string is "DEFABC". The
- start-up optimization scans along the subject, finds "A" and runs the
- first match attempt from there. The (*COMMIT) item means that the pat-
- tern must match the current starting position, which in this case, it
- does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE
- set, the initial scan along the subject string does not happen. The
- first match attempt is run starting from "D" and when this fails,
- (*COMMIT) prevents any further matches being tried, so the overall
+ When this is compiled, PCRE2 records the fact that a match must start
+ with the character "A". Suppose the subject string is "DEFABC". The
+ start-up optimization scans along the subject, finds "A" and runs the
+ first match attempt from there. The (*COMMIT) item means that the pat-
+ tern must match the current starting position, which in this case, it
+ does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE
+ set, the initial scan along the subject string does not happen. The
+ first match attempt is run starting from "D" and when this fails,
+ (*COMMIT) prevents any further matches being tried, so the overall
result is "no match".
- There are also other start-up optimizations. For example, a minimum
+ There are also other start-up optimizations. For example, a minimum
length for the subject may be recorded. Consider the pattern
(*MARK:A)(X|Y)
- The minimum length for a match is one character. If the subject is
+ The minimum length for a match is one character. If the subject is
"ABC", there will be attempts to match "ABC", "BC", and "C". An attempt
to match an empty string at the end of the subject does not take place,
- because PCRE2 knows that the subject is now too short, and so the
- (*MARK) is never encountered. In this case, the optimization does not
+ because PCRE2 knows that the subject is now too short, and so the
+ (*MARK) is never encountered. In this case, the optimization does not
affect the overall match result, which is still "no match", but it does
affect the auxiliary information that is returned.
PCRE2_NO_UTF_CHECK
- When PCRE2_UTF is set, the validity of the pattern as a UTF string is
- automatically checked. There are discussions about the validity of
- UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode
- document. If an invalid UTF sequence is found, pcre2_compile() returns
+ When PCRE2_UTF is set, the validity of the pattern as a UTF string is
+ automatically checked. There are discussions about the validity of
+ UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode
+ document. If an invalid UTF sequence is found, pcre2_compile() returns
a negative error code.
- If you know that your pattern is a valid UTF string, and you want to
- skip this check for performance reasons, you can set the
- PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an
+ If you know that your pattern is a valid UTF string, and you want to
+ skip this check for performance reasons, you can set the
+ PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an
invalid UTF string as a pattern is undefined. It may cause your program
to crash or loop.
Note that this option can also be passed to pcre2_match() and
- pcre_dfa_match(), to suppress UTF validity checking of the subject
+ pcre_dfa_match(), to suppress UTF validity checking of the subject
string.
Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis-
- able the error that is given if an escape sequence for an invalid Uni-
- code code point is encountered in the pattern. In particular, the so-
- called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you
- want to allow escape sequences such as \x{d800} you can set the
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the
- section entitled "Extra compile options" below. However, this is pos-
+ able the error that is given if an escape sequence for an invalid Uni-
+ code code point is encountered in the pattern. In particular, the so-
+ called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you
+ want to allow escape sequences such as \x{d800} you can set the
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the
+ section entitled "Extra compile options" below. However, this is pos-
sible only in UTF-8 and UTF-32 modes, because these values are not rep-
resentable in UTF-16.
PCRE2_UCP
This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
- \w, and some of the POSIX character classes. By default, only ASCII
- characters are recognized, but if PCRE2_UCP is set, Unicode properties
- are used instead to classify characters. More details are given in the
+ \w, and some of the POSIX character classes. By default, only ASCII
+ characters are recognized, but if PCRE2_UCP is set, Unicode properties
+ are used instead to classify characters. More details are given in the
section on generic character types in the pcre2pattern page. If you set
- PCRE2_UCP, matching one of the items it affects takes much longer. The
- option is available only if PCRE2 has been compiled with Unicode sup-
+ PCRE2_UCP, matching one of the items it affects takes much longer. The
+ option is available only if PCRE2 has been compiled with Unicode sup-
port (which is the default).
PCRE2_UNGREEDY
- This option inverts the "greediness" of the quantifiers so that they
- are not greedy by default, but become greedy if followed by "?". It is
- not compatible with Perl. It can also be set by a (?U) option setting
+ This option inverts the "greediness" of the quantifiers so that they
+ are not greedy by default, but become greedy if followed by "?". It is
+ not compatible with Perl. It can also be set by a (?U) option setting
within the pattern.
PCRE2_USE_OFFSET_LIMIT
This option must be set for pcre2_compile() if pcre2_set_offset_limit()
- is going to be used to set a non-default offset limit in a match con-
- text for matches that use this pattern. An error is generated if an
- offset limit is set without this option. For more details, see the
- description of pcre2_set_offset_limit() in the section that describes
+ is going to be used to set a non-default offset limit in a match con-
+ text for matches that use this pattern. An error is generated if an
+ offset limit is set without this option. For more details, see the
+ description of pcre2_set_offset_limit() in the section that describes
match contexts. See also the PCRE2_FIRSTLINE option above.
PCRE2_UTF
- This option causes PCRE2 to regard both the pattern and the subject
- strings that are subsequently processed as strings of UTF characters
- instead of single-code-unit strings. It is available when PCRE2 is
- built to include Unicode support (which is the default). If Unicode
- support is not available, the use of this option provokes an error.
- Details of how PCRE2_UTF changes the behaviour of PCRE2 are given in
- the pcre2unicode page. In particular, note that it changes the way
+ This option causes PCRE2 to regard both the pattern and the subject
+ strings that are subsequently processed as strings of UTF characters
+ instead of single-code-unit strings. It is available when PCRE2 is
+ built to include Unicode support (which is the default). If Unicode
+ support is not available, the use of this option provokes an error.
+ Details of how PCRE2_UTF changes the behaviour of PCRE2 are given in
+ the pcre2unicode page. In particular, note that it changes the way
PCRE2_CASELESS handles characters with code points greater than 127.
Extra compile options
- The option bits that can be set in a compile context by calling the
- pcre2_set_compile_extra_options() function are as follows:
+ Unlike the main compile-time options, the extra options are not saved
+ with the compiled pattern. The option bits that can be set in a compile
+ context by calling the pcre2_set_compile_extra_options() function are
+ as follows:
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
- This option applies when compiling a pattern in UTF-8 or UTF-32 mode.
- It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode
+ This option applies when compiling a pattern in UTF-8 or UTF-32 mode.
+ It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode
"surrogate" code points in the range 0xd800 to 0xdfff are used in pairs
- in UTF-16 to encode code points with values in the range 0x10000 to
- 0x10ffff. The surrogates cannot therefore be represented in UTF-16.
+ in UTF-16 to encode code points with values in the range 0x10000 to
+ 0x10ffff. The surrogates cannot therefore be represented in UTF-16.
They can be represented in UTF-8 and UTF-32, but are defined as invalid
- code points, and cause errors if encountered in a UTF-8 or UTF-32
+ code points, and cause errors if encountered in a UTF-8 or UTF-32
string that is being checked for validity by PCRE2.
- These values also cause errors if encountered in escape sequences such
+ These values also cause errors if encountered in escape sequences such
as \x{d912} within a pattern. However, it seems that some applications,
- when using PCRE2 to check for unwanted characters in UTF-8 strings,
- explicitly test for the surrogates using escape sequences. The
- PCRE2_NO_UTF_CHECK option does not disable the error that occurs,
- because it applies only to the testing of input strings for UTF valid-
+ when using PCRE2 to check for unwanted characters in UTF-8 strings,
+ explicitly test for the surrogates using escape sequences. The
+ PCRE2_NO_UTF_CHECK option does not disable the error that occurs,
+ because it applies only to the testing of input strings for UTF valid-
ity.
- If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro-
- gate code point values in UTF-8 and UTF-32 patterns no longer provoke
- errors and are incorporated in the compiled pattern. However, they can
- only match subject characters if the matching function is called with
+ If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro-
+ gate code point values in UTF-8 and UTF-32 patterns no longer provoke
+ errors and are incorporated in the compiled pattern. However, they can
+ only match subject characters if the matching function is called with
PCRE2_NO_UTF_CHECK set.
- PCRE2_EXTRA_ALT_BSUX
-
- The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and
- \x in the way that ECMAscript (aka JavaScript) does. Additional func-
- tionality was defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has
- the effect of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..}
- as a hexadecimal character code, where hhh.. is any number of hexadeci-
- mal digits.
-
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
This is a dangerous option. Use with care. By default, an unrecognized
@@ -1863,41 +1831,28 @@ COMPILING A PATTERN
Perl.
If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
- pcre2_compile(), all unrecognized or malformed escape sequences are
+ pcre2_compile(), all unrecognized or erroneous escape sequences are
treated as single-character escapes. For example, \j is a literal "j"
and \x{2z} is treated as the literal string "x{2z}". Setting this
option means that typos in patterns may go undetected and have unex-
- pected results. Also note that a sequence such as [\N{] is interpreted
- as a malformed attempt at [\N{...}] and so is treated as [N{] whereas
- [\N] gives an error because an unqualified \N is a valid escape
- sequence but is not supported in a character class. To reiterate: this
- is a dangerous option. Use with great care.
-
- PCRE2_EXTRA_ESCAPED_CR_IS_LF
-
- There are some legacy applications where the escape sequence \r in a
- pattern is expected to match a newline. If this option is set, \r in a
- pattern is converted to \n so that it matches a LF (linefeed) instead
- of a CR (carriage return) character. The option does not affect a lit-
- eral CR in the pattern, nor does it affect CR specified as an explicit
- code point such as \x{0D}.
+ pected results. This is a dangerous option. Use with care.
PCRE2_EXTRA_MATCH_LINE
- This option is provided for use by the -x option of pcre2grep. It
- causes the pattern only to match complete lines. This is achieved by
- automatically inserting the code for "^(?:" at the start of the com-
- piled pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set,
- the matched line may be in the middle of the subject string. This
+ This option is provided for use by the -x option of pcre2grep. It
+ causes the pattern only to match complete lines. This is achieved by
+ automatically inserting the code for "^(?:" at the start of the com-
+ piled pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set,
+ the matched line may be in the middle of the subject string. This
option can be used with PCRE2_LITERAL.
PCRE2_EXTRA_MATCH_WORD
- This option is provided for use by the -w option of pcre2grep. It
- causes the pattern only to match strings that have a word boundary at
- the start and the end. This is achieved by automatically inserting the
- code for "\b(?:" at the start of the compiled pattern and ")\b" at the
- end. The option may be used with PCRE2_LITERAL. However, it is ignored
+ This option is provided for use by the -w option of pcre2grep. It
+ causes the pattern only to match strings that have a word boundary at
+ the start and the end. This is achieved by automatically inserting the
+ code for "\b(?:" at the start of the compiled pattern and ")\b" at the
+ end. The option may be used with PCRE2_LITERAL. However, it is ignored
if PCRE2_EXTRA_MATCH_LINE is also set.
@@ -1920,53 +1875,53 @@ JUST-IN-TIME (JIT) COMPILATION
void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
- These functions provide support for JIT compilation, which, if the
- just-in-time compiler is available, further processes a compiled pat-
+ These functions provide support for JIT compilation, which, if the
+ just-in-time compiler is available, further processes a compiled pat-
tern into machine code that executes much faster than the pcre2_match()
- interpretive matching function. Full details are given in the pcre2jit
+ interpretive matching function. Full details are given in the pcre2jit
documentation.
- JIT compilation is a heavyweight optimization. It can take some time
- for patterns to be analyzed, and for one-off matches and simple pat-
- terns the benefit of faster execution might be offset by a much slower
- compilation time. Most (but not all) patterns can be optimized by the
+ JIT compilation is a heavyweight optimization. It can take some time
+ for patterns to be analyzed, and for one-off matches and simple pat-
+ terns the benefit of faster execution might be offset by a much slower
+ compilation time. Most (but not all) patterns can be optimized by the
JIT compiler.
LOCALE SUPPORT
- PCRE2 handles caseless matching, and determines whether characters are
- letters, digits, or whatever, by reference to a set of tables, indexed
- by character code point. This applies only to characters whose code
- points are less than 256. By default, higher-valued code points never
- match escapes such as \w or \d. However, if PCRE2 is built with Uni-
+ PCRE2 handles caseless matching, and determines whether characters are
+ letters, digits, or whatever, by reference to a set of tables, indexed
+ by character code point. This applies only to characters whose code
+ points are less than 256. By default, higher-valued code points never
+ match escapes such as \w or \d. However, if PCRE2 is built with Uni-
code support, all characters can be tested with \p and \P, or, alterna-
- tively, the PCRE2_UCP option can be set when a pattern is compiled;
- this causes \w and friends to use Unicode property support instead of
+ tively, the PCRE2_UCP option can be set when a pattern is compiled;
+ this causes \w and friends to use Unicode property support instead of
the built-in tables.
- The use of locales with Unicode is discouraged. If you are handling
- characters with code points greater than 128, you should either use
+ The use of locales with Unicode is discouraged. If you are handling
+ characters with code points greater than 128, you should either use
Unicode support, or use locales, but not try to mix the two.
- PCRE2 contains an internal set of character tables that are used by
- default. These are sufficient for many applications. Normally, the
+ PCRE2 contains an internal set of character tables that are used by
+ default. These are sufficient for many applications. Normally, the
internal tables recognize only ASCII characters. However, when PCRE2 is
built, it is possible to cause the internal tables to be rebuilt in the
default "C" locale of the local system, which may cause them to be dif-
ferent.
- The internal tables can be overridden by tables supplied by the appli-
- cation that calls PCRE2. These may be created in a different locale
- from the default. As more and more applications change to using Uni-
+ The internal tables can be overridden by tables supplied by the appli-
+ cation that calls PCRE2. These may be created in a different locale
+ from the default. As more and more applications change to using Uni-
code, the need for this locale support is expected to die away.
- External tables are built by calling the pcre2_maketables() function,
- in the relevant locale. The result can be passed to pcre2_compile() as
- often as necessary, by creating a compile context and calling
- pcre2_set_character_tables() to set the tables pointer therein. For
- example, to build and use tables that are appropriate for the French
- locale (where accented characters with values greater than 128 are
+ External tables are built by calling the pcre2_maketables() function,
+ in the relevant locale. The result can be passed to pcre2_compile() as
+ often as necessary, by creating a compile context and calling
+ pcre2_set_character_tables() to set the tables pointer therein. For
+ example, to build and use tables that are appropriate for the French
+ locale (where accented characters with values greater than 128 are
treated as letters), the following code could be used:
setlocale(LC_CTYPE, "fr_FR");
@@ -1975,15 +1930,15 @@ LOCALE SUPPORT
pcre2_set_character_tables(ccontext, tables);
re = pcre2_compile(..., ccontext);
- The locale name "fr_FR" is used on Linux and other Unix-like systems;
- if you are using Windows, the name for the French locale is "french".
- It is the caller's responsibility to ensure that the memory containing
+ The locale name "fr_FR" is used on Linux and other Unix-like systems;
+ if you are using Windows, the name for the French locale is "french".
+ It is the caller's responsibility to ensure that the memory containing
the tables remains available for as long as it is needed.
The pointer that is passed (via the compile context) to pcre2_compile()
- is saved with the compiled pattern, and the same tables are used by
- pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com-
- pilation and matching both happen in the same locale, but different
+ is saved with the compiled pattern, and the same tables are used by
+ pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com-
+ pilation and matching both happen in the same locale, but different
patterns can be processed in different locales.
@@ -1991,13 +1946,13 @@ INFORMATION ABOUT A COMPILED PATTERN
int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
- The pcre2_pattern_info() function returns general information about a
+ The pcre2_pattern_info() function returns general information about a
compiled pattern. For information about callouts, see the next section.
- The first argument for pcre2_pattern_info() is a pointer to the com-
+ The first argument for pcre2_pattern_info() is a pointer to the com-
piled pattern. The second argument specifies which piece of information
- is required, and the third argument is a pointer to a variable to
- receive the data. If the third argument is NULL, the first argument is
- ignored, and the function returns the size in bytes of the variable
+ is required, and the third argument is a pointer to a variable to
+ receive the data. If the third argument is NULL, the first argument is
+ ignored, and the function returns the size in bytes of the variable
that is required for the information requested. Otherwise, the yield of
the function is zero for success, or one of the following negative num-
bers:
@@ -2007,9 +1962,9 @@ INFORMATION ABOUT A COMPILED PATTERN
PCRE2_ERROR_BADOPTION the value of what was invalid
PCRE2_ERROR_UNSET the requested field is not set
- The "magic number" is placed at the start of each compiled pattern as
- an simple check against passing an arbitrary memory pointer. Here is a
- typical call of pcre2_pattern_info(), to obtain the length of the com-
+ The "magic number" is placed at the start of each compiled pattern as
+ an simple check against passing an arbitrary memory pointer. Here is a
+ typical call of pcre2_pattern_info(), to obtain the length of the com-
piled pattern:
int rc;
@@ -2027,22 +1982,22 @@ INFORMATION ABOUT A COMPILED PATTERN
PCRE2_INFO_EXTRAOPTIONS
Return copies of the pattern's options. The third argument should point
- to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the
- options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP-
- TIONS returns the compile options as modified by any top-level (*XXX)
- option settings such as (*UTF) at the start of the pattern itself.
- PCRE2_INFO_EXTRAOPTIONS returns the extra options that were set in the
- compile context by calling the pcre2_set_compile_extra_options() func-
+ to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the
+ options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP-
+ TIONS returns the compile options as modified by any top-level (*XXX)
+ option settings such as (*UTF) at the start of the pattern itself.
+ PCRE2_INFO_EXTRAOPTIONS returns the extra options that were set in the
+ compile context by calling the pcre2_set_compile_extra_options() func-
tion.
- For example, if the pattern /(*UTF)abc/ is compiled with the
- PCRE2_EXTENDED option, the result for PCRE2_INFO_ALLOPTIONS is
- PCRE2_EXTENDED and PCRE2_UTF. Option settings such as (?i) that can
- change within a pattern do not affect the result of PCRE2_INFO_ALLOP-
+ For example, if the pattern /(*UTF)abc/ is compiled with the
+ PCRE2_EXTENDED option, the result for PCRE2_INFO_ALLOPTIONS is
+ PCRE2_EXTENDED and PCRE2_UTF. Option settings such as (?i) that can
+ change within a pattern do not affect the result of PCRE2_INFO_ALLOP-
TIONS, even if they appear right at the start of the pattern. (This was
different in some earlier releases.)
- A pattern compiled without PCRE2_ANCHORED is automatically anchored by
+ A pattern compiled without PCRE2_ANCHORED is automatically anchored by
PCRE2 if the first significant item in every top-level branch is one of
the following:
@@ -2051,104 +2006,104 @@ INFORMATION ABOUT A COMPILED PATTERN
\G always
.* sometimes - see below
- When .* is the first significant item, anchoring is possible only when
+ When .* is the first significant item, anchoring is possible only when
all the following are true:
.* is not in an atomic group
- .* is not in a capture group that is the subject
+ .* is not in a capturing group that is the subject
of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
- For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in
+ For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in
the options returned for PCRE2_INFO_ALLOPTIONS.
PCRE2_INFO_BACKREFMAX
- Return the number of the highest backreference in the pattern. The
- third argument should point to an uint32_t variable. Named capture
- groups acquire numbers as well as names, and these count towards the
- highest backreference. Backreferences such as \4 or \g{12} match the
- captured characters of the given group, but in addition, the check that
- a capture group is set in a conditional group such as (?(3)a|b) is also
- a backreference. Zero is returned if there are no backreferences.
+ Return the number of the highest backreference in the pattern. The
+ third argument should point to an uint32_t variable. Named subpatterns
+ acquire numbers as well as names, and these count towards the highest
+ backreference. Backreferences such as \4 or \g{12} match the captured
+ characters of the given group, but in addition, the check that a cap-
+ turing group is set in a conditional subpattern such as (?(3)a|b) is
+ also a backreference. Zero is returned if there are no backreferences.
PCRE2_INFO_BSR
- The output is a uint32_t integer whose value indicates what character
- sequences the \R escape sequence matches. A value of PCRE2_BSR_UNICODE
- means that \R matches any Unicode line ending sequence; a value of
+ The output is a uint32_t integer whose value indicates what character
+ sequences the \R escape sequence matches. A value of PCRE2_BSR_UNICODE
+ means that \R matches any Unicode line ending sequence; a value of
PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF.
PCRE2_INFO_CAPTURECOUNT
- Return the highest capture group number in the pattern. In patterns
- where (?| is not used, this is also the total number of capture groups.
- The third argument should point to an uint32_t variable.
+ Return the highest capturing subpattern number in the pattern. In pat-
+ terns where (?| is not used, this is also the total number of capturing
+ subpatterns. The third argument should point to an uint32_t variable.
PCRE2_INFO_DEPTHLIMIT
- If the pattern set a backtracking depth limit by including an item of
- the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The
+ If the pattern set a backtracking depth limit by including an item of
+ the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The
third argument should point to a uint32_t integer. If no such value has
- been set, the call to pcre2_pattern_info() returns the error
+ been set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
- ing if it is less than the limit set or defaulted by the caller of the
+ ing if it is less than the limit set or defaulted by the caller of the
match function.
PCRE2_INFO_FIRSTBITMAP
- In the absence of a single first code unit for a non-anchored pattern,
- pcre2_compile() may construct a 256-bit table that defines a fixed set
- of values for the first code unit in any match. For example, a pattern
- that starts with [abc] results in a table with three bits set. When
- code unit values greater than 255 are supported, the flag bit for 255
- means "any code unit of value 255 or above". If such a table was con-
- structed, a pointer to it is returned. Otherwise NULL is returned. The
+ In the absence of a single first code unit for a non-anchored pattern,
+ pcre2_compile() may construct a 256-bit table that defines a fixed set
+ of values for the first code unit in any match. For example, a pattern
+ that starts with [abc] results in a table with three bits set. When
+ code unit values greater than 255 are supported, the flag bit for 255
+ means "any code unit of value 255 or above". If such a table was con-
+ structed, a pointer to it is returned. Otherwise NULL is returned. The
third argument should point to a const uint8_t * variable.
PCRE2_INFO_FIRSTCODETYPE
Return information about the first code unit of any matched string, for
- a non-anchored pattern. The third argument should point to an uint32_t
- variable. If there is a fixed first value, for example, the letter "c"
- from a pattern such as (cat|cow|coyote), 1 is returned, and the value
- can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed
- first value, but it is known that a match can occur only at the start
- of the subject or following a newline in the subject, 2 is returned.
+ a non-anchored pattern. The third argument should point to an uint32_t
+ variable. If there is a fixed first value, for example, the letter "c"
+ from a pattern such as (cat|cow|coyote), 1 is returned, and the value
+ can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed
+ first value, but it is known that a match can occur only at the start
+ of the subject or following a newline in the subject, 2 is returned.
Otherwise, and for anchored patterns, 0 is returned.
PCRE2_INFO_FIRSTCODEUNIT
- Return the value of the first code unit of any matched string for a
- pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
- The third argument should point to an uint32_t variable. In the 8-bit
- library, the value is always less than 256. In the 16-bit library the
- value can be up to 0xffff. In the 32-bit library in UTF-32 mode the
+ Return the value of the first code unit of any matched string for a
+ pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
+ The third argument should point to an uint32_t variable. In the 8-bit
+ library, the value is always less than 256. In the 16-bit library the
+ value can be up to 0xffff. In the 32-bit library in UTF-32 mode the
value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32
mode.
PCRE2_INFO_FRAMESIZE
Return the size (in bytes) of the data frames that are used to remember
- backtracking positions when the pattern is processed by pcre2_match()
- without the use of JIT. The third argument should point to a size_t
+ backtracking positions when the pattern is processed by pcre2_match()
+ without the use of JIT. The third argument should point to a size_t
variable. The frame size depends on the number of capturing parentheses
- in the pattern. Each additional capture group adds two PCRE2_SIZE vari-
- ables.
+ in the pattern. Each additional capturing group adds two PCRE2_SIZE
+ variables.
PCRE2_INFO_HASBACKSLASHC
- Return 1 if the pattern contains any instances of \C, otherwise 0. The
+ Return 1 if the pattern contains any instances of \C, otherwise 0. The
third argument should point to an uint32_t variable.
PCRE2_INFO_HASCRORLF
- Return 1 if the pattern contains any explicit matches for CR or LF
+ Return 1 if the pattern contains any explicit matches for CR or LF
characters, otherwise 0. The third argument should point to an uint32_t
- variable. An explicit match is either a literal CR or LF character, or
- \r or \n or one of the equivalent hexadecimal or octal escape
+ variable. An explicit match is either a literal CR or LF character, or
+ \r or \n or one of the equivalent hexadecimal or octal escape
sequences.
PCRE2_INFO_HEAPLIMIT
@@ -2156,81 +2111,81 @@ INFORMATION ABOUT A COMPILED PATTERN
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
ment should point to a uint32_t integer. If no such value has been set,
- the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET.
- Note that this limit will only be used during matching if it is less
+ the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET.
+ Note that this limit will only be used during matching if it is less
than the limit set or defaulted by the caller of the match function.
PCRE2_INFO_JCHANGED
- Return 1 if the (?J) or (?-J) option setting is used in the pattern,
- otherwise 0. The third argument should point to an uint32_t variable.
- (?J) and (?-J) set and unset the local PCRE2_DUPNAMES option, respec-
+ Return 1 if the (?J) or (?-J) option setting is used in the pattern,
+ otherwise 0. The third argument should point to an uint32_t variable.
+ (?J) and (?-J) set and unset the local PCRE2_DUPNAMES option, respec-
tively.
PCRE2_INFO_JITSIZE
- If the compiled pattern was successfully processed by pcre2_jit_com-
- pile(), return the size of the JIT compiled code, otherwise return
+ If the compiled pattern was successfully processed by pcre2_jit_com-
+ pile(), return the size of the JIT compiled code, otherwise return
zero. The third argument should point to a size_t variable.
PCRE2_INFO_LASTCODETYPE
- Returns 1 if there is a rightmost literal code unit that must exist in
- any matched string, other than at its start. The third argument should
- point to an uint32_t variable. If there is no such value, 0 is
- returned. When 1 is returned, the code unit value itself can be
- retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last
- literal value is recorded only if it follows something of variable
- length. For example, for the pattern /^a\d+z\d+/ the returned value is
- 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/
+ Returns 1 if there is a rightmost literal code unit that must exist in
+ any matched string, other than at its start. The third argument should
+ point to an uint32_t variable. If there is no such value, 0 is
+ returned. When 1 is returned, the code unit value itself can be
+ retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last
+ literal value is recorded only if it follows something of variable
+ length. For example, for the pattern /^a\d+z\d+/ the returned value is
+ 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/
the returned value is 0.
PCRE2_INFO_LASTCODEUNIT
- Return the value of the rightmost literal code unit that must exist in
- any matched string, other than at its start, for a pattern where
+ Return the value of the rightmost literal code unit that must exist in
+ any matched string, other than at its start, for a pattern where
PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argu-
ment should point to an uint32_t variable.
PCRE2_INFO_MATCHEMPTY
- Return 1 if the pattern might match an empty string, otherwise 0. The
- third argument should point to an uint32_t variable. When a pattern
+ Return 1 if the pattern might match an empty string, otherwise 0. The
+ third argument should point to an uint32_t variable. When a pattern
contains recursive subroutine calls it is not always possible to deter-
- mine whether or not it can match an empty string. PCRE2 takes a cau-
+ mine whether or not it can match an empty string. PCRE2 takes a cau-
tious approach and returns 1 in such cases.
PCRE2_INFO_MATCHLIMIT
- If the pattern set a match limit by including an item of the form
- (*LIMIT_MATCH=nnnn) at the start, the value is returned. The third
- argument should point to a uint32_t integer. If no such value has been
- set, the call to pcre2_pattern_info() returns the error
+ If the pattern set a match limit by including an item of the form
+ (*LIMIT_MATCH=nnnn) at the start, the value is returned. The third
+ argument should point to a uint32_t integer. If no such value has been
+ set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
- ing if it is less than the limit set or defaulted by the caller of the
+ ing if it is less than the limit set or defaulted by the caller of the
match function.
PCRE2_INFO_MAXLOOKBEHIND
Return the number of characters (not code units) in the longest lookbe-
- hind assertion in the pattern. The third argument should point to a
- uint32_t integer. This information is useful when doing multi-segment
- matching using the partial matching facilities. Note that the simple
+ hind assertion in the pattern. The third argument should point to a
+ uint32_t integer. This information is useful when doing multi-segment
+ matching using the partial matching facilities. Note that the simple
assertions \b and \B require a one-character lookbehind. \A also regis-
- ters a one-character lookbehind, though it does not actually inspect
- the previous character. This is to ensure that at least one character
- from the old segment is retained when a new segment is processed. Oth-
- erwise, if there are no lookbehinds in the pattern, \A might match
+ ters a one-character lookbehind, though it does not actually inspect
+ the previous character. This is to ensure that at least one character
+ from the old segment is retained when a new segment is processed. Oth-
+ erwise, if there are no lookbehinds in the pattern, \A might match
incorrectly at the start of a second or subsequent segment.
PCRE2_INFO_MINLENGTH
- If a minimum length for matching subject strings was computed, its
- value is returned. Otherwise the returned value is 0. The value is a
- number of characters, which in UTF mode may be different from the num-
- ber of code units. The third argument should point to an uint32_t
- variable. The value is a lower bound to the length of any matching
- string. There may not be any strings of that length that do actually
+ If a minimum length for matching subject strings was computed, its
+ value is returned. Otherwise the returned value is 0. The value is a
+ number of characters, which in UTF mode may be different from the num-
+ ber of code units. The third argument should point to an uint32_t
+ variable. The value is a lower bound to the length of any matching
+ string. There may not be any strings of that length that do actually
match, but every string that does match is at least that long.
PCRE2_INFO_NAMECOUNT
@@ -2238,41 +2193,40 @@ INFORMATION ABOUT A COMPILED PATTERN
PCRE2_INFO_NAMETABLE
PCRE2 supports the use of named as well as numbered capturing parenthe-
- ses. The names are just an additional way of identifying the parenthe-
+ ses. The names are just an additional way of identifying the parenthe-
ses, which still acquire numbers. Several convenience functions such as
- pcre2_substring_get_byname() are provided for extracting captured sub-
- strings by name. It is also possible to extract the data directly, by
- first converting the name to a number in order to access the correct
- pointers in the output vector (described with pcre2_match() below). To
- do the conversion, you need to use the name-to-number map, which is
+ pcre2_substring_get_byname() are provided for extracting captured sub-
+ strings by name. It is also possible to extract the data directly, by
+ first converting the name to a number in order to access the correct
+ pointers in the output vector (described with pcre2_match() below). To
+ do the conversion, you need to use the name-to-number map, which is
described by these three values.
- The map consists of a number of fixed-size entries. PCRE2_INFO_NAME-
- COUNT gives the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives
- the size of each entry in code units; both of these return a uint32_t
+ The map consists of a number of fixed-size entries. PCRE2_INFO_NAME-
+ COUNT gives the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives
+ the size of each entry in code units; both of these return a uint32_t
value. The entry size depends on the length of the longest name.
PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table.
- This is a PCRE2_SPTR pointer to a block of code units. In the 8-bit
- library, the first two bytes of each entry are the number of the cap-
+ This is a PCRE2_SPTR pointer to a block of code units. In the 8-bit
+ library, the first two bytes of each entry are the number of the cap-
turing parenthesis, most significant byte first. In the 16-bit library,
- the pointer points to 16-bit code units, the first of which contains
- the parenthesis number. In the 32-bit library, the pointer points to
- 32-bit code units, the first of which contains the parenthesis number.
+ the pointer points to 16-bit code units, the first of which contains
+ the parenthesis number. In the 32-bit library, the pointer points to
+ 32-bit code units, the first of which contains the parenthesis number.
The rest of the entry is the corresponding name, zero terminated.
- The names are in alphabetical order. If (?| is used to create multiple
- capture groups with the same number, as described in the section on
- duplicate group numbers in the pcre2pattern page, the groups may be
- given the same name, but there is only one entry in the table. Differ-
- ent names for groups of the same number are not permitted.
+ The names are in alphabetical order. If (?| is used to create multiple
+ groups with the same number, as described in the section on duplicate
+ subpattern numbers in the pcre2pattern page, the groups may be given
+ the same name, but there is only one entry in the table. Different
+ names for groups of the same number are not permitted.
- Duplicate names for capture groups with different numbers are permit-
- ted, but only if PCRE2_DUPNAMES is set. They appear in the table in the
- order in which they were found in the pattern. In the absence of (?|
- this is the order of increasing number; when (?| is used this is not
- necessarily the case because later capture groups may have lower num-
- bers.
+ Duplicate names for subpatterns with different numbers are permitted,
+ but only if PCRE2_DUPNAMES is set. They appear in the table in the
+ order in which they were found in the pattern. In the absence of (?|
+ this is the order of increasing number; when (?| is used this is not
+ necessarily the case because later subpatterns may have lower numbers.
As a simple example of the name/number table, consider the following
pattern after compilation by the 8-bit library (assume PCRE2_EXTENDED
@@ -2281,7 +2235,7 @@ INFORMATION ABOUT A COMPILED PATTERN
(?<date> (?<year>(\d\d)?\d\d) -
(?<month>\d\d) - (?<day>\d\d) )
- There are four named capture groups, so the table has four entries, and
+ There are four named subpatterns, so the table has four entries, and
each entry in the table is eight bytes long. The table is as follows,
with non-printing bytes shows in hexadecimal, and undefined bytes shown
as ??:
@@ -2291,7 +2245,7 @@ INFORMATION ABOUT A COMPILED PATTERN
00 04 m o n t h 00
00 02 y e a r 00 ??
- When writing code to extract data from named capture groups using the
+ When writing code to extract data from named subpatterns using the
name-to-number map, remember that the length of the entries is likely
to be different for each compiled pattern.
@@ -2410,16 +2364,13 @@ THE MATCH DATA BLOCK
When one of the matching functions is called, pointers to the compiled
pattern and the subject string are set in the match data block so that
- they can be referenced by the extraction functions after a successful
- match. After running a match, you must not free a compiled pattern or a
- subject string until after all operations on the match data block (for
- that match) have taken place, unless, in the case of the subject
- string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is
- described in the section entitled "Option bits for pcre2_match()"
- below.
+ they can be referenced by the extraction functions. After running a
+ match, you must not free a compiled pattern or a subject string until
+ after all operations on the match data block (for that match) have
+ taken place.
- When a match data block itself is no longer needed, it should be freed
- by calling pcre2_match_data_free(). If this function is called with a
+ When a match data block itself is no longer needed, it should be freed
+ by calling pcre2_match_data_free(). If this function is called with a
NULL argument, it returns immediately, without doing anything.
@@ -2430,15 +2381,15 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext);
- The function pcre2_match() is called to match a subject string against
- a compiled pattern, which is passed in the code argument. You can call
+ The function pcre2_match() is called to match a subject string against
+ a compiled pattern, which is passed in the code argument. You can call
pcre2_match() with the same code argument as many times as you like, in
- order to find multiple matches in the subject string or to match dif-
+ order to find multiple matches in the subject string or to match dif-
ferent subject strings with the same pattern.
- This function is the main matching facility of the library, and it
- operates in a Perl-like manner. For specialist use there is also an
- alternative matching function, which is described below in the section
+ This function is the main matching facility of the library, and it
+ operates in a Perl-like manner. For specialist use there is also an
+ alternative matching function, which is described below in the section
about the pcre2_dfa_match() function.
Here is an example of a simple call to pcre2_match():
@@ -2453,74 +2404,73 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
md, /* the match data block */
NULL); /* a match context; NULL means use defaults */
- If the subject string is zero-terminated, the length can be given as
+ If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less
common matching parameters are to be changed. For details, see the sec-
tion on the match context above.
The string to be matched by pcre2_match()
- The subject string is passed to pcre2_match() as a pointer in subject,
- a length in length, and a starting offset in startoffset. The length
- and offset are in code units, not characters. That is, they are in
- bytes for the 8-bit library, 16-bit code units for the 16-bit library,
- and 32-bit code units for the 32-bit library, whether or not UTF pro-
+ The subject string is passed to pcre2_match() as a pointer in subject,
+ a length in length, and a starting offset in startoffset. The length
+ and offset are in code units, not characters. That is, they are in
+ bytes for the 8-bit library, 16-bit code units for the 16-bit library,
+ and 32-bit code units for the 32-bit library, whether or not UTF pro-
cessing is enabled.
If startoffset is greater than the length of the subject, pcre2_match()
- returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the
- search for a match starts at the beginning of the subject, and this is
+ returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the
+ search for a match starts at the beginning of the subject, and this is
by far the most common case. In UTF-8 or UTF-16 mode, the starting off-
- set must point to the start of a character, or to the end of the sub-
- ject (in UTF-32 mode, one code unit equals one character, so all off-
- sets are valid). Like the pattern string, the subject may contain
+ set must point to the start of a character, or to the end of the sub-
+ ject (in UTF-32 mode, one code unit equals one character, so all off-
+ sets are valid). Like the pattern string, the subject may contain
binary zeros.
- A non-zero starting offset is useful when searching for another match
- in the same subject by calling pcre2_match() again after a previous
- success. Setting startoffset differs from passing over a shortened
- string and setting PCRE2_NOTBOL in the case of a pattern that begins
+ A non-zero starting offset is useful when searching for another match
+ in the same subject by calling pcre2_match() again after a previous
+ success. Setting startoffset differs from passing over a shortened
+ string and setting PCRE2_NOTBOL in the case of a pattern that begins
with any kind of lookbehind. For example, consider the pattern
\Biss\B
- which finds occurrences of "iss" in the middle of words. (\B matches
- only if the current position in the subject is not a word boundary.)
+ which finds occurrences of "iss" in the middle of words. (\B matches
+ only if the current position in the subject is not a word boundary.)
When applied to the string "Mississipi" the first call to pcre2_match()
- finds the first occurrence. If pcre2_match() is called again with just
- the remainder of the subject, namely "issipi", it does not match,
+ finds the first occurrence. If pcre2_match() is called again with just
+ the remainder of the subject, namely "issipi", it does not match,
because \B is always false at the start of the subject, which is deemed
- to be a word boundary. However, if pcre2_match() is passed the entire
+ to be a word boundary. However, if pcre2_match() is passed the entire
string again, but with startoffset set to 4, it finds the second occur-
- rence of "iss" because it is able to look behind the starting point to
+ rence of "iss" because it is able to look behind the starting point to
discover that it is preceded by a letter.
- Finding all the matches in a subject is tricky when the pattern can
+ Finding all the matches in a subject is tricky when the pattern can
match an empty string. It is possible to emulate Perl's /g behaviour by
- first trying the match again at the same offset, with the
- PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED options, and then if that
- fails, advancing the starting offset and trying an ordinary match
- again. There is some code that demonstrates how to do this in the
- pcre2demo sample program. In the most general case, you have to check
- to see if the newline convention recognizes CRLF as a newline, and if
- so, and the current character is CR followed by LF, advance the start-
+ first trying the match again at the same offset, with the
+ PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED options, and then if that
+ fails, advancing the starting offset and trying an ordinary match
+ again. There is some code that demonstrates how to do this in the
+ pcre2demo sample program. In the most general case, you have to check
+ to see if the newline convention recognizes CRLF as a newline, and if
+ so, and the current character is CR followed by LF, advance the start-
ing offset by two characters instead of one.
If a non-zero starting offset is passed when the pattern is anchored, a
single attempt to match at the given offset is made. This can only suc-
- ceed if the pattern does not require the match to be at the start of
- the subject. In other words, the anchoring must be the result of set-
- ting the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not
+ ceed if the pattern does not require the match to be at the start of
+ the subject. In other words, the anchoring must be the result of set-
+ ting the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not
by starting the pattern with ^ or \A.
Option bits for pcre2_match()
The unused bits of the options argument for pcre2_match() must be zero.
- The only bits that may be set are PCRE2_ANCHORED,
- PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL,
- PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
- PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their
- action is described below.
+ The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
+ PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+ PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PAR-
+ TIAL_SOFT. Their action is described below.
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not sup-
ported by the just-in-time (JIT) compiler. If it is set, JIT matching
@@ -2536,122 +2486,105 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
unachored at matching time. Note that setting the option at match time
disables JIT matching.
- PCRE2_COPY_MATCHED_SUBJECT
-
- By default, a pointer to the subject is remembered in the match data
- block so that, after a successful match, it can be referenced by the
- substring extraction functions. This means that the subject's memory
- must not be freed until all such operations are complete. For some
- applications where the lifetime of the subject string is not guaran-
- teed, it may be necessary to make a copy of the subject string, but it
- is wasteful to do this unless the match is successful. After a success-
- ful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the subject is copied
- and the new pointer is remembered in the match data block instead of
- the original subject pointer. The memory allocator that was used for
- the match block itself is used. The copy is automatically freed when
- pcre2_match_data_free() is called to free the match data block. It is
- also automatically freed if the match data block is re-used for another
- match operation.
-
PCRE2_ENDANCHORED
- If the PCRE2_ENDANCHORED option is set, any string that pcre2_match()
- matches must be right at the end of the subject string. Note that set-
+ If the PCRE2_ENDANCHORED option is set, any string that pcre2_match()
+ matches must be right at the end of the subject string. Note that set-
ting the option at match time disables JIT matching.
PCRE2_NOTBOL
This option specifies that first character of the subject string is not
- the beginning of a line, so the circumflex metacharacter should not
- match before it. Setting this without having set PCRE2_MULTILINE at
+ the beginning of a line, so the circumflex metacharacter should not
+ match before it. Setting this without having set PCRE2_MULTILINE at
compile time causes circumflex never to match. This option affects only
the behaviour of the circumflex metacharacter. It does not affect \A.
PCRE2_NOTEOL
This option specifies that the end of the subject string is not the end
- of a line, so the dollar metacharacter should not match it nor (except
- in multiline mode) a newline immediately before it. Setting this with-
- out having set PCRE2_MULTILINE at compile time causes dollar never to
+ of a line, so the dollar metacharacter should not match it nor (except
+ in multiline mode) a newline immediately before it. Setting this with-
+ out having set PCRE2_MULTILINE at compile time causes dollar never to
match. This option affects only the behaviour of the dollar metacharac-
ter. It does not affect \Z or \z.
PCRE2_NOTEMPTY
An empty string is not considered to be a valid match if this option is
- set. If there are alternatives in the pattern, they are tried. If all
- the alternatives match the empty string, the entire match fails. For
+ set. If there are alternatives in the pattern, they are tried. If all
+ the alternatives match the empty string, the entire match fails. For
example, if the pattern
a?b?
- is applied to a string not beginning with "a" or "b", it matches an
+ is applied to a string not beginning with "a" or "b", it matches an
empty string at the start of the subject. With PCRE2_NOTEMPTY set, this
- match is not valid, so pcre2_match() searches further into the string
+ match is not valid, so pcre2_match() searches further into the string
for occurrences of "a" or "b".
PCRE2_NOTEMPTY_ATSTART
- This is like PCRE2_NOTEMPTY, except that it locks out an empty string
+ This is like PCRE2_NOTEMPTY, except that it locks out an empty string
match only at the first matching position, that is, at the start of the
- subject plus the starting offset. An empty string match later in the
- subject is permitted. If the pattern is anchored, such a match can
+ subject plus the starting offset. An empty string match later in the
+ subject is permitted. If the pattern is anchored, such a match can
occur only if the pattern contains \K.
PCRE2_NO_JIT
- By default, if a pattern has been successfully processed by
- pcre2_jit_compile(), JIT is automatically used when pcre2_match() is
- called with options that JIT supports. Setting PCRE2_NO_JIT disables
+ By default, if a pattern has been successfully processed by
+ pcre2_jit_compile(), JIT is automatically used when pcre2_match() is
+ called with options that JIT supports. Setting PCRE2_NO_JIT disables
the use of JIT; it forces matching to be done by the interpreter.
PCRE2_NO_UTF_CHECK
When PCRE2_UTF is set at compile time, the validity of the subject as a
- UTF string is checked by default when pcre2_match() is subsequently
- called. If a non-zero starting offset is given, the check is applied
- only to that part of the subject that could be inspected during match-
- ing, and there is a check that the starting offset points to the first
- code unit of a character or to the end of the subject. If there are no
- lookbehind assertions in the pattern, the check starts at the starting
- offset. Otherwise, it starts at the length of the longest lookbehind
+ UTF string is checked by default when pcre2_match() is subsequently
+ called. If a non-zero starting offset is given, the check is applied
+ only to that part of the subject that could be inspected during match-
+ ing, and there is a check that the starting offset points to the first
+ code unit of a character or to the end of the subject. If there are no
+ lookbehind assertions in the pattern, the check starts at the starting
+ offset. Otherwise, it starts at the length of the longest lookbehind
before the starting offset, or at the start of the subject if there are
- not that many characters before the starting offset. Note that the
+ not that many characters before the starting offset. Note that the
sequences \b and \B are one-character lookbehinds.
The check is carried out before any other processing takes place, and a
- negative error code is returned if the check fails. There are several
- UTF error codes for each code unit width, corresponding to different
- problems with the code unit sequence. There are discussions about the
- validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the
+ negative error code is returned if the check fails. There are several
+ UTF error codes for each code unit width, corresponding to different
+ problems with the code unit sequence. There are discussions about the
+ validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the
pcre2unicode page.
- If you know that your subject is valid, and you want to skip these
- checks for performance reasons, you can set the PCRE2_NO_UTF_CHECK
- option when calling pcre2_match(). You might want to do this for the
+ If you know that your subject is valid, and you want to skip these
+ checks for performance reasons, you can set the PCRE2_NO_UTF_CHECK
+ option when calling pcre2_match(). You might want to do this for the
second and subsequent calls to pcre2_match() if you are making repeated
calls to find other matches in the same subject string.
- Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
- invalid string as a subject, or an invalid value of startoffset, is
+ Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
+ invalid string as a subject, or an invalid value of startoffset, is
undefined. Your program may crash or loop indefinitely.
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
- These options turn on the partial matching feature. A partial match
- occurs if the end of the subject string is reached successfully, but
- there are not enough subject characters to complete the match. If this
- happens when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) is set,
- matching continues by testing any remaining alternatives. Only if no
- complete match can be found is PCRE2_ERROR_PARTIAL returned instead of
- PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that
- the caller is prepared to handle a partial match, but only if no com-
+ These options turn on the partial matching feature. A partial match
+ occurs if the end of the subject string is reached successfully, but
+ there are not enough subject characters to complete the match. If this
+ happens when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) is set,
+ matching continues by testing any remaining alternatives. Only if no
+ complete match can be found is PCRE2_ERROR_PARTIAL returned instead of
+ PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that
+ the caller is prepared to handle a partial match, but only if no com-
plete match can be found.
- If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this
- case, if a partial match is found, pcre2_match() immediately returns
- PCRE2_ERROR_PARTIAL, without considering any other alternatives. In
+ If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this
+ case, if a partial match is found, pcre2_match() immediately returns
+ PCRE2_ERROR_PARTIAL, without considering any other alternatives. In
other words, when PCRE2_PARTIAL_HARD is set, a partial match is consid-
ered to be more important that an alternative complete match.
@@ -2661,38 +2594,38 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
NEWLINE HANDLING WHEN MATCHING
- When PCRE2 is built, a default newline convention is set; this is usu-
- ally the standard convention for the operating system. The default can
- be overridden in a compile context by calling pcre2_set_newline(). It
- can also be overridden by starting a pattern string with, for example,
- (*CRLF), as described in the section on newline conventions in the
- pcre2pattern page. During matching, the newline choice affects the be-
- haviour of the dot, circumflex, and dollar metacharacters. It may also
- alter the way the match starting position is advanced after a match
+ When PCRE2 is built, a default newline convention is set; this is usu-
+ ally the standard convention for the operating system. The default can
+ be overridden in a compile context by calling pcre2_set_newline(). It
+ can also be overridden by starting a pattern string with, for example,
+ (*CRLF), as described in the section on newline conventions in the
+ pcre2pattern page. During matching, the newline choice affects the be-
+ haviour of the dot, circumflex, and dollar metacharacters. It may also
+ alter the way the match starting position is advanced after a match
failure for an unanchored pattern.
When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is
- set as the newline convention, and a match attempt for an unanchored
+ set as the newline convention, and a match attempt for an unanchored
pattern fails when the current starting position is at a CRLF sequence,
- and the pattern contains no explicit matches for CR or LF characters,
- the match position is advanced by two characters instead of one, in
+ and the pattern contains no explicit matches for CR or LF characters,
+ the match position is advanced by two characters instead of one, in
other words, to after the CRLF.
The above rule is a compromise that makes the most common cases work as
- expected. For example, if the pattern is .+A (and the PCRE2_DOTALL
+ expected. For example, if the pattern is .+A (and the PCRE2_DOTALL
option is not set), it does not match the string "\r\nA" because, after
- failing at the start, it skips both the CR and the LF before retrying.
- However, the pattern [\r\n]A does match that string, because it con-
+ failing at the start, it skips both the CR and the LF before retrying.
+ However, the pattern [\r\n]A does match that string, because it con-
tains an explicit CR or LF reference, and so advances only by one char-
acter after the first failure.
An explicit match for CR of LF is either a literal appearance of one of
- those characters in the pattern, or one of the \r or \n or equivalent
+ those characters in the pattern, or one of the \r or \n or equivalent
octal or hexadecimal escape sequences. Implicit matches such as [^X] do
- not count, nor does \s, even though it includes CR and LF in the char-
+ not count, nor does \s, even though it includes CR and LF in the char-
acters that it matches.
- Notwithstanding the above, anomalous effects may still occur when CRLF
+ Notwithstanding the above, anomalous effects may still occur when CRLF
is a valid newline sequence and explicit \r or \n escapes appear in the
pattern.
@@ -2703,82 +2636,82 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
- In general, a pattern matches a certain portion of the subject, and in
- addition, further substrings from the subject may be picked out by
- parenthesized parts of the pattern. Following the usage in Jeffrey
- Friedl's book, this is called "capturing" in what follows, and the
- phrase "capture group" (Perl terminology) is used for a fragment of a
- pattern that picks out a substring. PCRE2 supports several other kinds
- of parenthesized group that do not cause substrings to be captured. The
- pcre2_pattern_info() function can be used to find out how many capture
- groups there are in a compiled pattern.
-
- You can use auxiliary functions for accessing captured substrings by
+ In general, a pattern matches a certain portion of the subject, and in
+ addition, further substrings from the subject may be picked out by
+ parenthesized parts of the pattern. Following the usage in Jeffrey
+ Friedl's book, this is called "capturing" in what follows, and the
+ phrase "capturing subpattern" or "capturing group" is used for a frag-
+ ment of a pattern that picks out a substring. PCRE2 supports several
+ other kinds of parenthesized subpattern that do not cause substrings to
+ be captured. The pcre2_pattern_info() function can be used to find out
+ how many capturing subpatterns there are in a compiled pattern.
+
+ You can use auxiliary functions for accessing captured substrings by
number or by name, as described in sections below.
Alternatively, you can make direct use of the vector of PCRE2_SIZE val-
- ues, called the ovector, which contains the offsets of captured
- strings. It is part of the match data block. The function
- pcre2_get_ovector_pointer() returns the address of the ovector, and
+ ues, called the ovector, which contains the offsets of captured
+ strings. It is part of the match data block. The function
+ pcre2_get_ovector_pointer() returns the address of the ovector, and
pcre2_get_ovector_count() returns the number of pairs of values it con-
tains.
Within the ovector, the first in each pair of values is set to the off-
set of the first code unit of a substring, and the second is set to the
- offset of the first code unit after the end of a substring. These val-
- ues are always code unit offsets, not character offsets. That is, they
- are byte offsets in the 8-bit library, 16-bit offsets in the 16-bit
+ offset of the first code unit after the end of a substring. These val-
+ ues are always code unit offsets, not character offsets. That is, they
+ are byte offsets in the 8-bit library, 16-bit offsets in the 16-bit
library, and 32-bit offsets in the 32-bit library.
- After a partial match (error return PCRE2_ERROR_PARTIAL), only the
- first pair of offsets (that is, ovector[0] and ovector[1]) are set.
- They identify the part of the subject that was partially matched. See
+ After a partial match (error return PCRE2_ERROR_PARTIAL), only the
+ first pair of offsets (that is, ovector[0] and ovector[1]) are set.
+ They identify the part of the subject that was partially matched. See
the pcre2partial documentation for details of partial matching.
- After a fully successful match, the first pair of offsets identifies
- the portion of the subject string that was matched by the entire pat-
- tern. The next pair is used for the first captured substring, and so
- on. The value returned by pcre2_match() is one more than the highest
- numbered pair that has been set. For example, if two substrings have
- been captured, the returned value is 3. If there are no captured sub-
+ After a fully successful match, the first pair of offsets identifies
+ the portion of the subject string that was matched by the entire pat-
+ tern. The next pair is used for the first captured substring, and so
+ on. The value returned by pcre2_match() is one more than the highest
+ numbered pair that has been set. For example, if two substrings have
+ been captured, the returned value is 3. If there are no captured sub-
strings, the return value from a successful match is 1, indicating that
just the first pair of offsets has been set.
- If a pattern uses the \K escape sequence within a positive assertion,
+ If a pattern uses the \K escape sequence within a positive assertion,
the reported start of a successful match can be greater than the end of
- the match. For example, if the pattern (?=ab\K) is matched against
+ the match. For example, if the pattern (?=ab\K) is matched against
"ab", the start and end offset values for the match are 2 and 0.
- If a capture group is matched repeatedly within a single match opera-
- tion, it is the last portion of the subject that it matched that is
- returned.
+ If a capturing subpattern group is matched repeatedly within a single
+ match operation, it is the last portion of the subject that it matched
+ that is returned.
If the ovector is too small to hold all the captured substring offsets,
- as much as possible is filled in, and the function returns a value of
- zero. If captured substrings are not of interest, pcre2_match() may be
+ as much as possible is filled in, and the function returns a value of
+ zero. If captured substrings are not of interest, pcre2_match() may be
called with a match data block whose ovector is of minimum length (that
is, one pair).
- It is possible for capture group number n+1 to match some part of the
- subject when group n has not been used at all. For example, if the
- string "abc" is matched against the pattern (a|(z))(bc) the return from
- the function is 4, and groups 1 and 3 are matched, but 2 is not. When
- this happens, both values in the offset pairs corresponding to unused
- groups are set to PCRE2_UNSET.
-
- Offset values that correspond to unused groups at the end of the
- expression are also set to PCRE2_UNSET. For example, if the string
- "abc" is matched against the pattern (abc)(x(yz)?)? groups 2 and 3 are
- not matched. The return from the function is 2, because the highest
- used capture group number is 1. The offsets for for the second and
- third capture groupss (assuming the vector is large enough, of course)
- are set to PCRE2_UNSET.
+ It is possible for capturing subpattern number n+1 to match some part
+ of the subject when subpattern n has not been used at all. For example,
+ if the string "abc" is matched against the pattern (a|(z))(bc) the
+ return from the function is 4, and subpatterns 1 and 3 are matched, but
+ 2 is not. When this happens, both values in the offset pairs corre-
+ sponding to unused subpatterns are set to PCRE2_UNSET.
+
+ Offset values that correspond to unused subpatterns at the end of the
+ expression are also set to PCRE2_UNSET. For example, if the string
+ "abc" is matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3
+ are not matched. The return from the function is 2, because the high-
+ est used capturing subpattern number is 1. The offsets for for the sec-
+ ond and third capturing subpatterns (assuming the vector is large
+ enough, of course) are set to PCRE2_UNSET.
Elements in the ovector that do not correspond to capturing parentheses
in the pattern are never changed. That is, if a pattern contains n cap-
turing parentheses, no more than ovector[0] to ovector[2n+1] are set by
- pcre2_match(). The other elements retain whatever values they previ-
- ously had. After a failed match attempt, the contents of the ovector
+ pcre2_match(). The other elements retain whatever values they previ-
+ ously had. After a failed match attempt, the contents of the ovector
are unchanged.
@@ -2788,69 +2721,69 @@ OTHER INFORMATION ABOUT A MATCH
PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
- As well as the offsets in the ovector, other information about a match
- is retained in the match data block and can be retrieved by the above
- functions in appropriate circumstances. If they are called at other
+ As well as the offsets in the ovector, other information about a match
+ is retained in the match data block and can be retrieved by the above
+ functions in appropriate circumstances. If they are called at other
times, the result is undefined.
- After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a
- failure to match (PCRE2_ERROR_NOMATCH), a mark name may be available.
- The function pcre2_get_mark() can be called to access this name, which
- can be specified in the pattern by any of the backtracking control
- verbs, not just (*MARK). The same function applies to all the verbs. It
+ After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a
+ failure to match (PCRE2_ERROR_NOMATCH), a (*MARK), (*PRUNE), or (*THEN)
+ name may be available. The function pcre2_get_mark() can be called to
+ access this name. The same function applies to all three verbs. It
returns a pointer to the zero-terminated name, which is within the com-
piled pattern. If no name is available, NULL is returned. The length of
the name (excluding the terminating zero) is stored in the code unit
that precedes the name. You should use this length instead of relying
on the terminating zero if the name might contain a binary zero.
- After a successful match, the name that is returned is the last mark
- name encountered on the matching path through the pattern. Instances of
- backtracking verbs without names do not count. Thus, for example, if
- the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned.
- After a "no match" or a partial match, the last encountered name is
- returned. For example, consider this pattern:
+ After a successful match, the name that is returned is the last
+ (*MARK), (*PRUNE), or (*THEN) name encountered on the matching path
+ through the pattern. Instances of (*PRUNE) and (*THEN) without names
+ are ignored. Thus, for example, if the matching path contains
+ (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a
+ partial match, the last encountered name is returned. For example,
+ consider this pattern:
^(*MARK:A)((*MARK:B)a|b)c
- When it matches "bc", the returned name is A. The B mark is "seen" in
- the first branch of the group, but it is not on the matching path. On
- the other hand, when this pattern fails to match "bx", the returned
+ When it matches "bc", the returned name is A. The B mark is "seen" in
+ the first branch of the group, but it is not on the matching path. On
+ the other hand, when this pattern fails to match "bx", the returned
name is B.
- Warning: By default, certain start-of-match optimizations are used to
- give a fast "no match" result in some situations. For example, if the
- anchoring is removed from the pattern above, there is an initial check
- for the presence of "c" in the subject before running the matching
+ Warning: By default, certain start-of-match optimizations are used to
+ give a fast "no match" result in some situations. For example, if the
+ anchoring is removed from the pattern above, there is an initial check
+ for the presence of "c" in the subject before running the matching
engine. This check fails for "bx", causing a match failure without see-
ing any marks. You can disable the start-of-match optimizations by set-
- ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or by
- starting the pattern with (*NO_START_OPT).
+ ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or starting
+ the pattern with (*NO_START_OPT).
- After a successful match, a partial match, or one of the invalid UTF
- errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
+ After a successful match, a partial match, or one of the invalid UTF
+ errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
be called. After a successful or partial match it returns the code unit
- offset of the character at which the match started. For a non-partial
- match, this can be different to the value of ovector[0] if the pattern
- contains the \K escape sequence. After a partial match, however, this
- value is always the same as ovector[0] because \K does not affect the
+ offset of the character at which the match started. For a non-partial
+ match, this can be different to the value of ovector[0] if the pattern
+ contains the \K escape sequence. After a partial match, however, this
+ value is always the same as ovector[0] because \K does not affect the
result of a partial match.
- After a UTF check failure, pcre2_get_startchar() can be used to obtain
+ After a UTF check failure, pcre2_get_startchar() can be used to obtain
the code unit offset of the invalid UTF character. Details are given in
the pcre2unicode page.
ERROR RETURNS FROM pcre2_match()
- If pcre2_match() fails, it returns a negative number. This can be con-
- verted to a text string by calling the pcre2_get_error_message() func-
- tion (see "Obtaining a textual error message" below). Negative error
- codes are also returned by other functions, and are documented with
- them. The codes are given names in the header file. If UTF checking is
+ If pcre2_match() fails, it returns a negative number. This can be con-
+ verted to a text string by calling the pcre2_get_error_message() func-
+ tion (see "Obtaining a textual error message" below). Negative error
+ codes are also returned by other functions, and are documented with
+ them. The codes are given names in the header file. If UTF checking is
in force and an invalid UTF subject string is detected, one of a number
- of UTF-specific negative error codes is returned. Details are given in
- the pcre2unicode page. The following are the other errors that may be
+ of UTF-specific negative error codes is returned. Details are given in
+ the pcre2unicode page. The following are the other errors that may be
returned by pcre2_match():
PCRE2_ERROR_NOMATCH
@@ -2859,20 +2792,20 @@ ERROR RETURNS FROM pcre2_match()
PCRE2_ERROR_PARTIAL
- The subject string did not match, but it did match partially. See the
+ The subject string did not match, but it did match partially. See the
pcre2partial documentation for details of partial matching.
PCRE2_ERROR_BADMAGIC
PCRE2 stores a 4-byte "magic number" at the start of the compiled code,
- to catch the case when it is passed a junk pointer. This is the error
+ to catch the case when it is passed a junk pointer. This is the error
that is returned when the magic number is not present.
PCRE2_ERROR_BADMODE
- This error is given when a compiled pattern is passed to a function in
- a library of a different code unit width, for example, a pattern com-
- piled by the 8-bit library is passed to a 16-bit or 32-bit library
+ This error is given when a compiled pattern is passed to a function in
+ a library of a different code unit width, for example, a pattern com-
+ piled by the 8-bit library is passed to a 16-bit or 32-bit library
function.
PCRE2_ERROR_BADOFFSET
@@ -2886,15 +2819,15 @@ ERROR RETURNS FROM pcre2_match()
PCRE2_ERROR_BADUTFOFFSET
The UTF code unit sequence that was passed as a subject was checked and
- found to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the
- value of startoffset did not point to the beginning of a UTF character
+ found to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the
+ value of startoffset did not point to the beginning of a UTF character
or the end of the subject.
PCRE2_ERROR_CALLOUT
- This error is never generated by pcre2_match() itself. It is provided
- for use by callout functions that want to cause pcre2_match() or
- pcre2_callout_enumerate() to return a distinctive error code. See the
+ This error is never generated by pcre2_match() itself. It is provided
+ for use by callout functions that want to cause pcre2_match() or
+ pcre2_callout_enumerate() to return a distinctive error code. See the
pcre2callout documentation for details.
PCRE2_ERROR_DEPTHLIMIT
@@ -2907,14 +2840,14 @@ ERROR RETURNS FROM pcre2_match()
PCRE2_ERROR_INTERNAL
- An unexpected internal error has occurred. This error could be caused
+ An unexpected internal error has occurred. This error could be caused
by a bug in PCRE2 or by overwriting of the compiled pattern.
PCRE2_ERROR_JIT_STACKLIMIT
- This error is returned when a pattern that was successfully studied
- using JIT is being matched, but the memory available for the just-in-
- time processing stack is not large enough. See the pcre2jit documenta-
+ This error is returned when a pattern that was successfully studied
+ using JIT is being matched, but the memory available for the just-in-
+ time processing stack is not large enough. See the pcre2jit documenta-
tion for more details.
PCRE2_ERROR_MATCHLIMIT
@@ -2923,12 +2856,11 @@ ERROR RETURNS FROM pcre2_match()
PCRE2_ERROR_NOMEMORY
- If a pattern contains many nested backtracking points, heap memory is
- used to remember them. This error is given when the memory allocation
- function (default or custom) fails. Note that a different error,
- PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
- the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
- PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+ If a pattern contains many nested backtracking points, heap memory is
+ used to remember them. This error is given when the memory allocation
+ function (default or custom) fails. Note that a different error,
+ PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+ the heap limit.
PCRE2_ERROR_NULL
@@ -2938,11 +2870,11 @@ ERROR RETURNS FROM pcre2_match()
This error is returned when pcre2_match() detects a recursion loop
within the pattern. Specifically, it means that either the whole pat-
- tern or a capture group has been called recursively for the second time
- at the same position in the subject string. Some simple patterns that
+ tern or a subpattern has been called recursively for the second time at
+ the same position in the subject string. Some simple patterns that
might do this are detected and faulted at compile time, but more com-
plicated cases, in particular mutual recursions between two different
- groups, cannot be detected until matching is attempted.
+ subpatterns, cannot be detected until matching is attempted.
OBTAINING A TEXTUAL ERROR MESSAGE
@@ -3015,7 +2947,7 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
copies it into new memory, obtained using the same memory allocation
function that was used for the match data block. The first two argu-
ments of these functions are a pointer to the match data block and a
- capture group number.
+ capturing group number.
The final arguments of pcre2_substring_copy_bynumber() are a pointer to
the buffer and a pointer to a variable that contains its length in code
@@ -3085,11 +3017,12 @@ EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
should be freed by calling pcre2_substring_list_free().
If this function encounters a substring that is unset, which can happen
- when capture group number n+1 matches some part of the subject, but
- group n has not been used at all, it returns an empty string. This can
- be distinguished from a genuine zero-length substring by inspecting the
- appropriate offset in the ovector, which contain PCRE2_UNSET for unset
- substrings, or by calling pcre2_substring_length_bynumber().
+ when capturing subpattern number n+1 matches some part of the subject,
+ but subpattern n has not been used at all, it returns an empty string.
+ This can be distinguished from a genuine zero-length substring by
+ inspecting the appropriate offset in the ovector, which contain
+ PCRE2_UNSET for unset substrings, or by calling pcre2_sub-
+ string_length_bynumber().
EXTRACTING CAPTURED SUBSTRINGS BY NAME
@@ -3108,27 +3041,26 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
void pcre2_substring_free(PCRE2_UCHAR *buffer);
- To extract a substring by name, you first have to find associated num-
+ To extract a substring by name, you first have to find associated num-
ber. For example, for this pattern:
(a+)b(?<xxx>\d+)...
- the number of the capture group called "xxx" is 2. If the name is known
- to be unique (PCRE2_DUPNAMES was not set), you can find the number from
+ the number of the subpattern called "xxx" is 2. If the name is known to
+ be unique (PCRE2_DUPNAMES was not set), you can find the number from
the name by calling pcre2_substring_number_from_name(). The first argu-
- ment is the compiled pattern, and the second is the name. The yield of
- the function is the group number, PCRE2_ERROR_NOSUBSTRING if there is
- no group with that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is
- more than one group with that name. Given the number, you can extract
- the substring directly from the ovector, or use one of the "bynumber"
- functions described above.
-
- For convenience, there are also "byname" functions that correspond to
- the "bynumber" functions, the only difference being that the second
- argument is a name instead of a number. If PCRE2_DUPNAMES is set and
+ ment is the compiled pattern, and the second is the name. The yield of
+ the function is the subpattern number, PCRE2_ERROR_NOSUBSTRING if there
+ is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if
+ there is more than one subpattern of that name. Given the number, you
+ can extract the substring directly from the ovector, or use one of the
+ "bynumber" functions described above.
+
+ For convenience, there are also "byname" functions that correspond to
+ the "bynumber" functions, the only difference being that the second
+ argument is a name instead of a number. If PCRE2_DUPNAMES is set and
there are duplicate names, these functions scan all the groups with the
- given name, and return the captured substring from the first named
- group that is set.
+ given name, and return the first named string that is set.
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
returned. If all groups with the name have numbers that are greater
@@ -3136,12 +3068,12 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
returned. If there is at least one group with a slot in the ovector,
but no group is found to be set, PCRE2_ERROR_UNSET is returned.
- Warning: If the pattern uses the (?| feature to set up multiple capture
- groups with the same number, as described in the section on duplicate
- group numbers in the pcre2pattern page, you cannot use names to distin-
- guish the different capture groups, because names are not included in
- the compiled code. The matching process uses only numbers. For this
- reason, the use of different names for groups with the same number
+ Warning: If the pattern uses the (?| feature to set up multiple subpat-
+ terns with the same number, as described in the section on duplicate
+ subpattern numbers in the pcre2pattern page, you cannot use names to
+ distinguish the different subpatterns, because names are not included
+ in the compiled code. The matching process uses only numbers. For this
+ reason, the use of different names for subpatterns of the same number
causes an error at compile time.
@@ -3151,22 +3083,18 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
PCRE2_SIZE length, PCRE2_SIZE startoffset,
uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, PCRE2_SPTR replacement,
- PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
+ PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP,
PCRE2_SIZE *outlengthptr);
This function calls pcre2_match() and then makes a copy of the subject
- string in outputbuffer, replacing one or more parts that were matched
- with the replacement string, whose length is supplied in rlength. This
- can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
- The default is to perform just one replacement, but there is an option
- that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below
- for details).
-
- Matches in which a \K item in a lookahead in the pattern causes the
- match to end before it starts are not supported, and give rise to an
- error return. For global replacements, matches in which \K in a lookbe-
- hind causes the match to start earlier than the point that was reached
- in the previous iteration are also not supported.
+ string in outputbuffer, replacing the part that was matched with the
+ replacement string, whose length is supplied in rlength. This can be
+ given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
+ which a \K item in a lookahead in the pattern causes the match to end
+ before it starts are not supported, and give rise to an error return.
+ For global replacements, matches in which \K in a lookbehind causes the
+ match to start earlier than the point that was reached in the previous
+ iteration are also not supported.
The first seven arguments of pcre2_substitute() are the same as for
pcre2_match(), except that the partial matching options are not permit-
@@ -3176,9 +3104,9 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
were used to allocate memory for the compiled code.
If an external match_data block is provided, its contents afterwards
- are those set by the final call to pcre2_match(). For global changes,
- this will have ended in a matching error. The contents of the ovector
- within the match data block may or may not have been changed.
+ are those set by the final call to pcre2_match(), which will have ended
+ in a matching error. The contents of the ovector within the match data
+ block may or may not have been changed.
The outlengthptr argument must point to a variable that contains the
length, in code units, of the output buffer. If the function is suc-
@@ -3200,13 +3128,13 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
In the replacement string, which is interpreted as a UTF string in UTF
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
option is set, a dollar character is an escape character that can spec-
- ify the insertion of characters from capture groups or names from
- (*MARK) or other control verbs in the pattern. The following forms are
+ ify the insertion of characters from capturing groups or (*MARK),
+ (*PRUNE), or (*THEN) items in the pattern. The following forms are
always recognized:
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
- $*MARK or ${*MARK} insert a control verb name
+ $*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
Either a group number or a group name can be given for <n>. Curly
brackets are required only if the following character would be inter-
@@ -3215,30 +3143,31 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
matched with "=abc=" and the replacement string "+$1$0$1+", the result
is "=+babcb+=".
- $*MARK inserts the name from the last encountered backtracking control
- verb on the matching path that has a name. (*MARK) must always include
- a name, but the other verbs need not. For example, in the case of
- (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B)
- the relevant name is "B". This facility can be used to perform simple
- simultaneous substitutions, as this pcre2test example shows:
+ $*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or
+ (*THEN) on the matching path that has a name. (*MARK) must always
+ include a name, but (*PRUNE) and (*THEN) need not. For example, in the
+ case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
+ (*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be
+ used to perform simple simultaneous substitutions, as this pcre2test
+ example shows:
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
apple lemon
2: pear orange
- As well as the usual options for pcre2_match(), a number of additional
+ As well as the usual options for pcre2_match(), a number of additional
options can be set in the options argument of pcre2_substitute().
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
- string, replacing every matching substring. If this option is not set,
- only the first matching substring is replaced. The search for matches
- takes place in the original subject string (that is, previous replace-
- ments do not affect it). Iteration is implemented by advancing the
- startoffset value for each search, which is always passed the entire
+ string, replacing every matching substring. If this option is not set,
+ only the first matching substring is replaced. The search for matches
+ takes place in the original subject string (that is, previous replace-
+ ments do not affect it). Iteration is implemented by advancing the
+ startoffset value for each search, which is always passed the entire
subject string. If an offset limit is set in the match context, search-
ing stops when that limit is reached.
- You can restrict the effect of a global substitution to a portion of
+ You can restrict the effect of a global substitution to a portion of
the subject string by setting either or both of startoffset and an off-
set limit. Here is a pcre2test example:
@@ -3246,35 +3175,36 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
ABC ABC ABC ABC\=offset=3,offset_limit=12
2: ABC A!C A!C ABC
- When continuing with global substitutions after matching a substring
+ When continuing with global substitutions after matching a substring
with zero length, an attempt to find a non-empty match at the same off-
set is performed. If this is not successful, the offset is advanced by
one character except when CRLF is a valid newline sequence and the next
- two characters are CR, LF. In this case, the offset is advanced by two
+ two characters are CR, LF. In this case, the offset is advanced by two
characters.
- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
- ORY immediately. If this option is set, however, pcre2_substitute()
+ ORY immediately. If this option is set, however, pcre2_substitute()
continues to go through the motions of matching and substituting (with-
- out, of course, writing anything) in order to compute the size of buf-
- fer that is needed. This value is passed back via the outlengthptr
- variable, with the result of the function still being
+ out, of course, writing anything) in order to compute the size of buf-
+ fer that is needed. This value is passed back via the outlengthptr
+ variable, with the result of the function still being
PCRE2_ERROR_NOMEMORY.
- Passing a buffer size of zero is a permitted way of finding out how
- much memory is needed for given substitution. However, this does mean
+ Passing a buffer size of zero is a permitted way of finding out how
+ much memory is needed for given substitution. However, this does mean
that the entire operation is carried out twice. Depending on the appli-
- cation, it may be more efficient to allocate a large buffer and free
- the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
+ cation, it may be more efficient to allocate a large buffer and free
+ the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
FLOW_LENGTH.
- PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that
- do not appear in the pattern to be treated as unset groups. This option
- should be used with care, because it means that a typo in a group name
- or number no longer causes the PCRE2_ERROR_NOSUBSTRING error.
+ PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups
+ that do not appear in the pattern to be treated as unset groups. This
+ option should be used with care, because it means that a typo in a
+ group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING
+ error.
- PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including
+ PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including
unknown groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be
treated as empty strings when inserted as described above. If this
option is not set, an attempt to insert an unset group causes the
@@ -3300,18 +3230,16 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
no case forcing. The sequences \u and \l force the next character (if
it is a letter) to upper or lower case, respectively, and then the
state automatically reverts to no case forcing. Case forcing applies to
- all inserted characters, including those from capture groups and let-
+ all inserted characters, including those from captured groups and let-
ters within \Q...\E quoted sequences.
Note that case forcing sequences such as \U...\E do not nest. For exam-
ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
- \E has no effect. Note also that the PCRE2_ALT_BSUX and
- PCRE2_EXTRA_ALT_BSUX options do not apply to not apply to replacement
- strings.
+ \E has no effect.
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
- flexibility to capture group substitution. The syntax is similar to
- that used by Bash:
+ flexibility to group substitution. The syntax is similar to that used
+ by Bash:
${<n>:-<string>}
${<n>:+<string1>:<string2>}
@@ -3340,8 +3268,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause
unknown groups in the extended syntax forms to be treated as unset.
- If successful, pcre2_substitute() returns the number of successful
- matches. This may be zero if no matches were found, and is never
+ If successful, pcre2_substitute() returns the number of replacements
+ that were made. This may be zero if no matches were found, and is never
greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
In the event of an error, a negative error code is returned. Except for
@@ -3374,117 +3302,62 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
obtained by calling the pcre2_get_error_message() function (see
"Obtaining a textual error message" above).
- Substitution callouts
- int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
- int (*callout_function)(pcre2_substitute_callout_block *, void *),
- void *callout_data);
-
- The pcre2_set_substitution_callout() function can be used to specify a
- callout function for pcre2_substitute(). This information is passed in
- a match context. The callout function is called after each substitution
- has been processed, but it can cause the replacement not to happen. The
- callout function is not called for simulated substitutions that happen
- as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
-
- The first argument of the callout function is a pointer to a substitute
- callout block structure, which contains the following fields, not nec-
- essarily in this order:
-
- uint32_t version;
- uint32_t subscount;
- PCRE2_SPTR input;
- PCRE2_SPTR output;
- PCRE2_SIZE *ovector;
- uint32_t oveccount;
- PCRE2_SIZE output_offsets[2];
-
- The version field contains the version number of the block format. The
- current version is 0. The version number will increase in future if
- more fields are added, but the intention is never to remove any of the
- existing fields.
-
- The subscount field is the number of the current match. It is 1 for the
- first callout, 2 for the second, and so on. The input and output point-
- ers are copies of the values passed to pcre2_substitute().
-
- The ovector field points to the ovector, which contains the result of
- the most recent match. The oveccount field contains the number of pairs
- that are set in the ovector, and is always greater than zero.
-
- The output_offsets vector contains the offsets of the replacement in
- the output string. This has already been processed for dollar and (if
- requested) backslash substitutions as described above.
-
- The second argument of the callout function is the value passed as
- callout_data when the function was registered. The value returned by
- the callout function is interpreted as follows:
-
- If the value is zero, the replacement is accepted, and, if PCRE2_SUB-
- STITUTE_GLOBAL is set, processing continues with a search for the next
- match. If the value is not zero, the current replacement is not
- accepted. If the value is greater than zero, processing continues when
- PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero
- or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is
- copied to the output and the call to pcre2_substitute() exits, return-
- ing the number of matches so far.
-
-
-DUPLICATE CAPTURE GROUP NAMES
+DUPLICATE SUBPATTERN NAMES
int pcre2_substring_nametable_scan(const pcre2_code *code,
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
- When a pattern is compiled with the PCRE2_DUPNAMES option, names for
- capture groups are not required to be unique. Duplicate names are
- always allowed for groups with the same number, created by using the
- (?| feature. Indeed, if such groups are named, they are required to use
- the same names.
+ When a pattern is compiled with the PCRE2_DUPNAMES option, names for
+ subpatterns are not required to be unique. Duplicate names are always
+ allowed for subpatterns with the same number, created by using the (?|
+ feature. Indeed, if such subpatterns are named, they are required to
+ use the same names.
- Normally, patterns that use duplicate names are such that in any one
- match, only one of each set of identically-named groups participates.
- An example is shown in the pcre2pattern documentation.
+ Normally, patterns with duplicate names are such that in any one match,
+ only one of the named subpatterns participates. An example is shown in
+ the pcre2pattern documentation.
- When duplicates are present, pcre2_substring_copy_byname() and
- pcre2_substring_get_byname() return the first substring corresponding
- to the given name that is set. Only if none are set is
- PCRE2_ERROR_UNSET is returned. The pcre2_substring_number_from_name()
+ When duplicates are present, pcre2_substring_copy_byname() and
+ pcre2_substring_get_byname() return the first substring corresponding
+ to the given name that is set. Only if none are set is
+ PCRE2_ERROR_UNSET is returned. The pcre2_substring_number_from_name()
function returns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are
duplicate names.
- If you want to get full details of all captured substrings for a given
- name, you must use the pcre2_substring_nametable_scan() function. The
- first argument is the compiled pattern, and the second is the name. If
- the third and fourth arguments are NULL, the function returns a group
+ If you want to get full details of all captured substrings for a given
+ name, you must use the pcre2_substring_nametable_scan() function. The
+ first argument is the compiled pattern, and the second is the name. If
+ the third and fourth arguments are NULL, the function returns a group
number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise.
When the third and fourth arguments are not NULL, they must be pointers
- to variables that are updated by the function. After it has run, they
+ to variables that are updated by the function. After it has run, they
point to the first and last entries in the name-to-number table for the
- given name, and the function returns the length of each entry in code
- units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
+ given name, and the function returns the length of each entry in code
+ units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
no entries for the given name.
The format of the name table is described above in the section entitled
- Information about a pattern. Given all the relevant entries for the
- name, you can extract each of their numbers, and hence the captured
+ Information about a pattern. Given all the relevant entries for the
+ name, you can extract each of their numbers, and hence the captured
data.
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
- The traditional matching function uses a similar algorithm to Perl,
- which stops when it finds the first match at a given point in the sub-
+ The traditional matching function uses a similar algorithm to Perl,
+ which stops when it finds the first match at a given point in the sub-
ject. If you want to find all possible matches, or the longest possible
- match at a given position, consider using the alternative matching
- function (see below) instead. If you cannot use the alternative func-
+ match at a given position, consider using the alternative matching
+ function (see below) instead. If you cannot use the alternative func-
tion, you can kludge it up by making use of the callout facility, which
is described in the pcre2callout documentation.
What you have to do is to insert a callout right at the end of the pat-
- tern. When your callout function is called, extract and save the cur-
- rent matched substring. Then return 1, which forces pcre2_match() to
- backtrack and try other alternatives. Ultimately, when it runs out of
+ tern. When your callout function is called, extract and save the cur-
+ rent matched substring. Then return 1, which forces pcre2_match() to
+ backtrack and try other alternatives. Ultimately, when it runs out of
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
@@ -3496,26 +3369,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
pcre2_match_context *mcontext,
int *workspace, PCRE2_SIZE wscount);
- The function pcre2_dfa_match() is called to match a subject string
- against a compiled pattern, using a matching algorithm that scans the
+ The function pcre2_dfa_match() is called to match a subject string
+ against a compiled pattern, using a matching algorithm that scans the
subject string just once (not counting lookaround assertions), and does
- not backtrack. This has different characteristics to the normal algo-
- rithm, and is not compatible with Perl. Some of the features of PCRE2
- patterns are not supported. Nevertheless, there are times when this
- kind of matching can be useful. For a discussion of the two matching
+ not backtrack. This has different characteristics to the normal algo-
+ rithm, and is not compatible with Perl. Some of the features of PCRE2
+ patterns are not supported. Nevertheless, there are times when this
+ kind of matching can be useful. For a discussion of the two matching
algorithms, and a list of features that pcre2_dfa_match() does not sup-
port, see the pcre2matching documentation.
- The arguments for the pcre2_dfa_match() function are the same as for
+ The arguments for the pcre2_dfa_match() function are the same as for
pcre2_match(), plus two extras. The ovector within the match data block
is used in a different way, and this is described below. The other com-
- mon arguments are used in the same way as for pcre2_match(), so their
+ mon arguments are used in the same way as for pcre2_match(), so their
description is not repeated here.
- The two additional arguments provide workspace for the function. The
- workspace vector should contain at least 20 elements. It is used for
+ The two additional arguments provide workspace for the function. The
+ workspace vector should contain at least 20 elements. It is used for
keeping track of multiple paths through the pattern tree. More
- workspace is needed for patterns and subjects where there are a lot of
+ workspace is needed for patterns and subjects where there are a lot of
potential matches.
Here is an example of a simple call to pcre2_dfa_match():
@@ -3535,14 +3408,13 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
Option bits for pcre_dfa_match()
- The unused bits of the options argument for pcre2_dfa_match() must be
- zero. The only bits that may be set are PCRE2_ANCHORED,
- PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL,
- PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
- PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
- PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
- these are exactly the same as for pcre2_match(), so their description
- is not repeated here.
+ The unused bits of the options argument for pcre2_dfa_match() must be
+ zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDAN-
+ CHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
+ PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
+ PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but
+ the last four of these are exactly the same as for pcre2_match(), so
+ their description is not repeated here.
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
@@ -3602,8 +3474,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
which is the number of matched substrings. The offsets of the sub-
strings are returned in the ovector, and can be extracted by number in
the same way as for pcre2_match(), but the numbers bear no relation to
- any capture groups that may exist in the pattern, because DFA matching
- does not support capturing.
+ any capturing groups that may exist in the pattern, because DFA match-
+ ing does not support group capture.
Calls to the convenience functions that extract substrings by name
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used
@@ -3640,7 +3512,7 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
This return is given if pcre2_dfa_match() encounters a condition item
that uses a backreference for the condition, or a test for recursion in
- a specific capture group. These are not supported.
+ a specific group. These are not supported.
PCRE2_ERROR_DFA_WSSIZE
@@ -3649,23 +3521,22 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
PCRE2_ERROR_DFA_RECURSE
- When a recursion or subroutine call is processed, the matching function
- calls itself recursively, using private memory for the ovector and
- workspace. This error is given if the internal ovector is not large
- enough. This should be extremely rare, as a vector of size 1000 is
- used.
+ When a recursive subpattern is processed, the matching function calls
+ itself recursively, using private memory for the ovector and workspace.
+ This error is given if the internal ovector is not large enough. This
+ should be extremely rare, as a vector of size 1000 is used.
PCRE2_ERROR_DFA_BADRESTART
- When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option,
- some plausibility checks are made on the contents of the workspace,
- which should contain data about the previous partial match. If any of
+ When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option,
+ some plausibility checks are made on the contents of the workspace,
+ which should contain data about the previous partial match. If any of
these checks fail, this error is given.
SEE ALSO
- pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3),
+ pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3),
pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3).
@@ -3678,8 +3549,8 @@ AUTHOR
REVISION
- Last updated: 14 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 07 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4027,48 +3898,45 @@ USING EBCDIC CODE
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
- By default pcre2grep supports the use of callouts with string arguments
- within the patterns it is matching. There are two kinds: one that gen-
- erates output using local code, and another that calls an external pro-
- gram or script. If --disable-pcre2grep-callout-fork is added to the
- configure command, only the first kind of callout is supported; if
- --disable-pcre2grep-callout is used, all callouts are completely
- ignored. For more details of pcre2grep callouts, see the pcre2grep doc-
- umentation.
+ By default, on non-Windows systems, pcre2grep supports the use of call-
+ outs with string arguments within the patterns it is matching, in order
+ to run external scripts. For details, see the pcre2grep documentation.
+ This support can be disabled by adding --disable-pcre2grep-callout to
+ the configure command.
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
- By default, pcre2grep reads all files as plain text. You can build it
- so that it recognizes files whose names end in .gz or .bz2, and reads
+ By default, pcre2grep reads all files as plain text. You can build it
+ so that it recognizes files whose names end in .gz or .bz2, and reads
them with libz or libbz2, respectively, by adding one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
to the configure command. These options naturally require that the rel-
- evant libraries are installed on your system. Configuration will fail
+ evant libraries are installed on your system. Configuration will fail
if they are not.
PCRE2GREP BUFFER SIZE
- pcre2grep uses an internal buffer to hold a "window" on the file it is
+ pcre2grep uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when
it finds a match. The default starting size of the buffer is 20KiB. The
- buffer itself is three times this size, but because of the way it is
+ buffer itself is three times this size, but because of the way it is
used for holding "before" lines, the longest line that is guaranteed to
be processable is the notional buffer size. If a longer line is encoun-
- tered, pcre2grep automatically expands the buffer, up to a specified
- maximum size, whose default is 1MiB or the starting size, whichever is
- the larger. You can change the default parameter values by adding, for
+ tered, pcre2grep automatically expands the buffer, up to a specified
+ maximum size, whose default is 1MiB or the starting size, whichever is
+ the larger. You can change the default parameter values by adding, for
example,
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
- to the configure command. The caller of pcre2grep can override these
- values by using --buffer-size and --max-buffer-size on the command
+ to the configure command. The caller of pcre2grep can override these
+ values by using --buffer-size and --max-buffer-size on the command
line.
@@ -4079,26 +3947,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
- to the configure command, pcre2test is linked with the libreadline
+ to the configure command, pcre2test is linked with the libreadline
orlibedit library, respectively, and when its input is from a terminal,
- it reads it using the readline() function. This provides line-editing
- and history facilities. Note that libreadline is GPL-licensed, so if
- you distribute a binary of pcre2test linked in this way, there may be
+ it reads it using the readline() function. This provides line-editing
+ and history facilities. Note that libreadline is GPL-licensed, so if
+ you distribute a binary of pcre2test linked in this way, there may be
licensing issues. These can be avoided by linking instead with libedit,
which has a BSD licence.
- Setting --enable-pcre2test-libreadline causes the -lreadline option to
- be added to the pcre2test build. In many operating environments with a
- sytem-installed readline library this is sufficient. However, in some
+ Setting --enable-pcre2test-libreadline causes the -lreadline option to
+ be added to the pcre2test build. In many operating environments with a
+ sytem-installed readline library this is sufficient. However, in some
environments (e.g. if an unmodified distribution version of readline is
- in use), some extra configuration may be necessary. The INSTALL file
+ in use), some extra configuration may be necessary. The INSTALL file
for libreadline says this:
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
- If your environment has not been set up so that an appropriate library
+ If your environment has not been set up so that an appropriate library
is automatically included, you may need to add something like
LIBS="-ncurses"
@@ -4112,7 +3980,7 @@ INCLUDING DEBUGGING CODE
--enable-debug
- to the configure command, additional debugging code is included in the
+ to the configure command, additional debugging code is included in the
build. This feature is intended for use by the PCRE2 maintainers.
@@ -4122,15 +3990,15 @@ DEBUGGING WITH VALGRIND SUPPORT
--enable-valgrind
- to the configure command, PCRE2 will use valgrind annotations to mark
- certain memory regions as unaddressable. This allows it to detect
- invalid memory accesses, and is mostly useful for debugging PCRE2
+ to the configure command, PCRE2 will use valgrind annotations to mark
+ certain memory regions as unaddressable. This allows it to detect
+ invalid memory accesses, and is mostly useful for debugging PCRE2
itself.
CODE COVERAGE REPORTING
- If your C compiler is gcc, you can build a version of PCRE2 that can
+ If your C compiler is gcc, you can build a version of PCRE2 that can
generate a code coverage report for its test suite. To enable this, you
must install lcov version 1.6 or above. Then specify
@@ -4139,20 +4007,20 @@ CODE COVERAGE REPORTING
to the configure command and build PCRE2 in the usual way.
Note that using ccache (a caching C compiler) is incompatible with code
- coverage reporting. If you have configured ccache to run automatically
+ coverage reporting. If you have configured ccache to run automatically
on your system, you must set the environment variable
CCACHE_DISABLE=1
before running make to build PCRE2, so that ccache is not used.
- When --enable-coverage is used, the following addition targets are
+ When --enable-coverage is used, the following addition targets are
added to the Makefile:
make coverage
- This creates a fresh coverage report for the PCRE2 test suite. It is
- equivalent to running "make coverage-reset", "make coverage-baseline",
+ This creates a fresh coverage report for the PCRE2 test suite. It is
+ equivalent to running "make coverage-reset", "make coverage-baseline",
"make check", and then "make coverage-report".
make coverage-reset
@@ -4169,71 +4037,56 @@ CODE COVERAGE REPORTING
make coverage-clean-report
- This removes the generated coverage report without cleaning the cover-
+ This removes the generated coverage report without cleaning the cover-
age data itself.
make coverage-clean-data
- This removes the captured coverage data without removing the coverage
+ This removes the captured coverage data without removing the coverage
files created at compile time (*.gcno).
make coverage-clean
- This cleans all coverage data including the generated coverage report.
- For more information about code coverage, see the gcov and lcov docu-
+ This cleans all coverage data including the generated coverage report.
+ For more information about code coverage, see the gcov and lcov docu-
mentation.
-DISABLING THE Z AND T FORMATTING MODIFIERS
-
- The C99 standard defines formatting modifiers z and t for size_t and
- ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
- in environments other than Microsoft Visual Studio when __STDC_VER-
- SION__ is defined and has a value greater than or equal to 199901L
- (indicating C99). However, there is at least one environment that
- claims to be C99 but does not support these modifiers. If
-
- --disable-percent-zt
-
- is specified, no use is made of the z or t modifiers. Instead or %td or
- %zu, %lu is used, with a cast for size_t values.
-
-
SUPPORT FOR FUZZERS
- There is a special option for use by people who want to run fuzzing
+ There is a special option for use by people who want to run fuzzing
tests on PCRE2:
--enable-fuzz-support
At present this applies only to the 8-bit library. If set, it causes an
- extra library called libpcre2-fuzzsupport.a to be built, but not
- installed. This contains a single function called LLVMFuzzerTestOneIn-
- put() whose arguments are a pointer to a string and the length of the
- string. When called, this function tries to compile the string as a
- pattern, and if that succeeds, to match it. This is done both with no
- options and with some random options bits that are generated from the
+ extra library called libpcre2-fuzzsupport.a to be built, but not
+ installed. This contains a single function called LLVMFuzzerTestOneIn-
+ put() whose arguments are a pointer to a string and the length of the
+ string. When called, this function tries to compile the string as a
+ pattern, and if that succeeds, to match it. This is done both with no
+ options and with some random options bits that are generated from the
string.
- Setting --enable-fuzz-support also causes a binary called pcre2fuz-
- zcheck to be created. This is normally run under valgrind or used when
+ Setting --enable-fuzz-support also causes a binary called pcre2fuz-
+ zcheck to be created. This is normally run under valgrind or used when
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
- function and outputs information about what it is doing. The input
- strings are specified by arguments: if an argument starts with "=" the
- rest of it is a literal input string. Otherwise, it is assumed to be a
+ function and outputs information about what it is doing. The input
+ strings are specified by arguments: if an argument starts with "=" the
+ rest of it is a literal input string. Otherwise, it is assumed to be a
file name, and the contents of the file are the test string.
OBSOLETE OPTION
- In versions of PCRE2 prior to 10.30, there were two ways of handling
- backtracking in the pcre2_match() function. The default was to use the
+ In versions of PCRE2 prior to 10.30, there were two ways of handling
+ backtracking in the pcre2_match() function. The default was to use the
system stack, but if
--disable-stack-for-recursion
- was set, memory on the heap was used. From release 10.30 onwards this
- has changed (the stack is no longer used) and this option now does
+ was set, memory on the heap was used. From release 10.30 onwards this
+ has changed (the stack is no longer used) and this option now does
nothing except give a warning.
@@ -4251,8 +4104,8 @@ AUTHOR
REVISION
- Last updated: 03 March 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 26 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4282,26 +4135,21 @@ DESCRIPTION
its entry point in a match context (see pcre2_set_callout() in the
pcre2api documentation).
- When using the pcre2_substitute() function, an additional callout fea-
- ture is available. This does a callout after each change to the subject
- string and is described in the pcre2api documentation; the rest of this
- document is concerned with callouts during pattern matching.
-
- Within a regular expression, (?C<arg>) indicates a point at which the
- external function is to be called. Different callout points can be
- identified by putting a number less than 256 after the letter C. The
- default value is zero. Alternatively, the argument may be a delimited
- string. The starting delimiter must be one of ` ' " ^ % # $ { and the
+ Within a regular expression, (?C<arg>) indicates a point at which the
+ external function is to be called. Different callout points can be
+ identified by putting a number less than 256 after the letter C. The
+ default value is zero. Alternatively, the argument may be a delimited
+ string. The starting delimiter must be one of ` ' " ^ % # $ { and the
ending delimiter is the same as the start, except for {, where the end-
- ing delimiter is }. If the ending delimiter is needed within the
- string, it must be doubled. For example, this pattern has two callout
+ ing delimiter is }. If the ending delimiter is needed within the
+ string, it must be doubled. For example, this pattern has two callout
points:
(?C1)abc(?C"some ""arbitrary"" text")def
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled,
- PCRE2 automatically inserts callouts, all with number 255, before each
- item in the pattern except for immediately before or after an explicit
+ PCRE2 automatically inserts callouts, all with number 255, before each
+ item in the pattern except for immediately before or after an explicit
callout. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
A(?C3)B
@@ -4318,36 +4166,36 @@ DESCRIPTION
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
- Notice that there is a callout before and after each parenthesis and
+ Notice that there is a callout before and after each parenthesis and
alternation bar. If the pattern contains a conditional group whose con-
- dition is an assertion, an automatic callout is inserted immediately
- before the condition. Such a callout may also be inserted explicitly,
+ dition is an assertion, an automatic callout is inserted immediately
+ before the condition. Such a callout may also be inserted explicitly,
for example:
(?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de)
- This applies only to assertion conditions (because they are themselves
+ This applies only to assertion conditions (because they are themselves
independent groups).
- Callouts can be useful for tracking the progress of pattern matching.
+ Callouts can be useful for tracking the progress of pattern matching.
The pcre2test program has a pattern qualifier (/auto_callout) that sets
- automatic callouts. When any callouts are present, the output from
- pcre2test indicates how the pattern is being matched. This is useful
- information when you are trying to optimize the performance of a par-
+ automatic callouts. When any callouts are present, the output from
+ pcre2test indicates how the pattern is being matched. This is useful
+ information when you are trying to optimize the performance of a par-
ticular pattern.
MISSING CALLOUTS
- You should be aware that, because of optimizations in the way PCRE2
+ You should be aware that, because of optimizations in the way PCRE2
compiles and matches patterns, callouts sometimes do not happen exactly
as you might expect.
Auto-possessification
At compile time, PCRE2 "auto-possessifies" repeated items when it knows
- that what follows cannot be part of the repeat. For example, a+[bc] is
- compiled as if it were a++[bc]. The pcre2test output when this pattern
+ that what follows cannot be part of the repeat. For example, a+[bc] is
+ compiled as if it were a++[bc]. The pcre2test output when this pattern
is compiled with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied
to the string "aaaa" is:
@@ -4356,11 +4204,11 @@ MISSING CALLOUTS
+2 ^ ^ [bc]
No match
- This indicates that when matching [bc] fails, there is no backtracking
+ This indicates that when matching [bc] fails, there is no backtracking
into a+ (because it is being treated as a++) and therefore the callouts
- that would be taken for the backtracks do not occur. You can disable
- the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
- pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In
+ that would be taken for the backtracks do not occur. You can disable
+ the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
+ pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In
this case, the output changes to this:
--->aaaa
@@ -4377,19 +4225,19 @@ MISSING CALLOUTS
Automatic .* anchoring
By default, an optimization is applied when .* is the first significant
- item in a pattern. If PCRE2_DOTALL is set, so that the dot can match
- any character, the pattern is automatically anchored. If PCRE2_DOTALL
- is not set, a match can start only after an internal newline or at the
+ item in a pattern. If PCRE2_DOTALL is set, so that the dot can match
+ any character, the pattern is automatically anchored. If PCRE2_DOTALL
+ is not set, a match can start only after an internal newline or at the
beginning of the subject, and pcre2_compile() remembers this. If a pat-
- tern has more than one top-level branch, automatic anchoring occurs if
+ tern has more than one top-level branch, automatic anchoring occurs if
all branches are anchorable.
- This optimization is disabled, however, if .* is in an atomic group or
- if there is a backreference to the capture group in which it appears.
- It is also disabled if the pattern contains (*PRUNE) or (*SKIP). How-
+ This optimization is disabled, however, if .* is in an atomic group or
+ if there is a backreference to the capturing group in which it appears.
+ It is also disabled if the pattern contains (*PRUNE) or (*SKIP). How-
ever, the presence of callouts does not affect it.
- For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT
+ For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT
and applied to the string "aa", the pcre2test output is:
--->aa
@@ -4399,10 +4247,10 @@ MISSING CALLOUTS
+2 ^ \d
No match
- This shows that all match attempts start at the beginning of the sub-
- ject. In other words, the pattern is anchored. You can disable this
- optimization by passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or
- starting the pattern with (*NO_DOTSTAR_ANCHOR). In this case, the out-
+ This shows that all match attempts start at the beginning of the sub-
+ ject. In other words, the pattern is anchored. You can disable this
+ optimization by passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or
+ starting the pattern with (*NO_DOTSTAR_ANCHOR). In this case, the out-
put changes to:
--->aa
@@ -4415,42 +4263,42 @@ MISSING CALLOUTS
+2 ^ \d
No match
- This shows more match attempts, starting at the second subject charac-
- ter. Another optimization, described in the next section, means that
+ This shows more match attempts, starting at the second subject charac-
+ ter. Another optimization, described in the next section, means that
there is no subsequent attempt to match with an empty subject.
Other optimizations
- Other optimizations that provide fast "no match" results also affect
+ Other optimizations that provide fast "no match" results also affect
callouts. For example, if the pattern is
ab(?C4)cd
- PCRE2 knows that any matching string must contain the letter "d". If
- the subject string is "abyz", the lack of "d" means that matching
- doesn't ever start, and the callout is never reached. However, with
+ PCRE2 knows that any matching string must contain the letter "d". If
+ the subject string is "abyz", the lack of "d" means that matching
+ doesn't ever start, and the callout is never reached. However, with
"abyd", though the result is still no match, the callout is obeyed.
- For most patterns PCRE2 also knows the minimum length of a matching
- string, and will immediately give a "no match" return without actually
- running a match if the subject is not long enough, or, for unanchored
+ For most patterns PCRE2 also knows the minimum length of a matching
+ string, and will immediately give a "no match" return without actually
+ running a match if the subject is not long enough, or, for unanchored
patterns, if it has been scanned far enough.
You can disable these optimizations by passing the PCRE2_NO_START_OPTI-
- MIZE option to pcre2_compile(), or by starting the pattern with
- (*NO_START_OPT). This slows down the matching process, but does ensure
+ MIZE option to pcre2_compile(), or by starting the pattern with
+ (*NO_START_OPT). This slows down the matching process, but does ensure
that callouts such as the example above are obeyed.
THE CALLOUT INTERFACE
- During matching, when PCRE2 reaches a callout point, if an external
- function is provided in the match context, it is called. This applies
- to both normal, DFA, and JIT matching. The first argument to the call-
+ During matching, when PCRE2 reaches a callout point, if an external
+ function is provided in the match context, it is called. This applies
+ to both normal, DFA, and JIT matching. The first argument to the call-
out function is a pointer to a pcre2_callout block. The second argument
- is the void * callout data that was supplied when the callout was set
+ is the void * callout data that was supplied when the callout was set
up by calling pcre2_set_callout() (see the pcre2api documentation). The
- callout block structure contains the following fields, not necessarily
+ callout block structure contains the following fields, not necessarily
in this order:
uint32_t version;
@@ -4470,118 +4318,118 @@ THE CALLOUT INTERFACE
PCRE2_SIZE callout_string_length;
PCRE2_SPTR callout_string;
- The version field contains the version number of the block format. The
- current version is 2; the three callout string fields were added for
- version 1, and the callout_flags field for version 2. If you are writ-
- ing an application that might use an earlier release of PCRE2, you
- should check the version number before accessing any of these fields.
- The version number will increase in future if more fields are added,
+ The version field contains the version number of the block format. The
+ current version is 2; the three callout string fields were added for
+ version 1, and the callout_flags field for version 2. If you are writ-
+ ing an application that might use an earlier release of PCRE2, you
+ should check the version number before accessing any of these fields.
+ The version number will increase in future if more fields are added,
but the intention is never to remove any of the existing fields.
Fields for numerical callouts
- For a numerical callout, callout_string is NULL, and callout_number
- contains the number of the callout, in the range 0-255. This is the
- number that follows (?C for callouts that part of the pattern; it is
+ For a numerical callout, callout_string is NULL, and callout_number
+ contains the number of the callout, in the range 0-255. This is the
+ number that follows (?C for callouts that part of the pattern; it is
255 for automatically generated callouts.
Fields for string callouts
- For callouts with string arguments, callout_number is always zero, and
- callout_string points to the string that is contained within the com-
+ For callouts with string arguments, callout_number is always zero, and
+ callout_string points to the string that is contained within the com-
piled pattern. Its length is given by callout_string_length. Duplicated
ending delimiters that were present in the original pattern string have
been turned into single characters, but there is no other processing of
- the callout string argument. An additional code unit containing binary
- zero is present after the string, but is not included in the length.
- The delimiter that was used to start the string is also stored within
- the pattern, immediately before the string itself. You can access this
+ the callout string argument. An additional code unit containing binary
+ zero is present after the string, but is not included in the length.
+ The delimiter that was used to start the string is also stored within
+ the pattern, immediately before the string itself. You can access this
delimiter as callout_string[-1] if you need it.
The callout_string_offset field is the code unit offset to the start of
the callout argument string within the original pattern string. This is
- provided for the benefit of applications such as script languages that
+ provided for the benefit of applications such as script languages that
might need to report errors in the callout string within the pattern.
Fields for all callouts
- The remaining fields in the callout block are the same for both kinds
+ The remaining fields in the callout block are the same for both kinds
of callout.
- The offset_vector field is a pointer to a vector of capturing offsets
+ The offset_vector field is a pointer to a vector of capturing offsets
(the "ovector"). You may read the elements in this vector, but you must
not change any of them.
- For calls to pcre2_match(), the offset_vector field is not (since
- release 10.30) a pointer to the actual ovector that was passed to the
- matching function in the match data block. Instead it points to an
- internal ovector of a size large enough to hold all possible captured
+ For calls to pcre2_match(), the offset_vector field is not (since
+ release 10.30) a pointer to the actual ovector that was passed to the
+ matching function in the match data block. Instead it points to an
+ internal ovector of a size large enough to hold all possible captured
substrings in the pattern. Note that whenever a recursion or subroutine
- call within a pattern completes, the capturing state is reset to what
+ call within a pattern completes, the capturing state is reset to what
it was before.
- The capture_last field contains the number of the most recently cap-
- tured substring, and the capture_top field contains one more than the
- number of the highest numbered captured substring so far. If no sub-
- strings have yet been captured, the value of capture_last is 0 and the
- value of capture_top is 1. The values of these fields do not always
- differ by one; for example, when the callout in the pattern
+ The capture_last field contains the number of the most recently cap-
+ tured substring, and the capture_top field contains one more than the
+ number of the highest numbered captured substring so far. If no sub-
+ strings have yet been captured, the value of capture_last is 0 and the
+ value of capture_top is 1. The values of these fields do not always
+ differ by one; for example, when the callout in the pattern
((a)(b))(?C2) is taken, capture_last is 1 but capture_top is 4.
- The contents of ovector[2] to ovector[<capture_top>*2-1] can be
+ The contents of ovector[2] to ovector[<capture_top>*2-1] can be
inspected in order to extract substrings that have been matched so far,
- in the same way as extracting substrings after a match has completed.
- The values in ovector[0] and ovector[1] are always PCRE2_UNSET because
- the match is by definition not complete. Substrings that have not been
- captured but whose numbers are less than capture_top also have both of
+ in the same way as extracting substrings after a match has completed.
+ The values in ovector[0] and ovector[1] are always PCRE2_UNSET because
+ the match is by definition not complete. Substrings that have not been
+ captured but whose numbers are less than capture_top also have both of
their ovector slots set to PCRE2_UNSET.
- For DFA matching, the offset_vector field points to the ovector that
- was passed to the matching function in the match data block for call-
+ For DFA matching, the offset_vector field points to the ovector that
+ was passed to the matching function in the match data block for call-
outs at the top level, but to an internal ovector during the processing
- of pattern recursions, lookarounds, and atomic groups. However, these
- ovectors hold no useful information because pcre2_dfa_match() does not
- support substring capturing. The value of capture_top is always 1 and
+ of pattern recursions, lookarounds, and atomic groups. However, these
+ ovectors hold no useful information because pcre2_dfa_match() does not
+ support substring capturing. The value of capture_top is always 1 and
the value of capture_last is always 0 for DFA matching.
The subject and subject_length fields contain copies of the values that
were passed to the matching function.
- The start_match field normally contains the offset within the subject
- at which the current match attempt started. However, if the escape
- sequence \K has been encountered, this value is changed to reflect the
- modified starting point. If the pattern is not anchored, the callout
+ The start_match field normally contains the offset within the subject
+ at which the current match attempt started. However, if the escape
+ sequence \K has been encountered, this value is changed to reflect the
+ modified starting point. If the pattern is not anchored, the callout
function may be called several times from the same point in the pattern
for different starting points in the subject.
- The current_position field contains the offset within the subject of
+ The current_position field contains the offset within the subject of
the current match pointer.
The pattern_position field contains the offset in the pattern string to
the next item to be matched.
- The next_item_length field contains the length of the next item to be
- processed in the pattern string. When the callout is at the end of the
- pattern, the length is zero. When the callout precedes an opening
+ The next_item_length field contains the length of the next item to be
+ processed in the pattern string. When the callout is at the end of the
+ pattern, the length is zero. When the callout precedes an opening
parenthesis, the length includes meta characters that follow the paren-
- thesis. For example, in a callout before an assertion such as (?=ab)
- the length is 3. For an an alternation bar or a closing parenthesis,
- the length is one, unless a closing parenthesis is followed by a quan-
+ thesis. For example, in a callout before an assertion such as (?=ab)
+ the length is 3. For an an alternation bar or a closing parenthesis,
+ the length is one, unless a closing parenthesis is followed by a quan-
tifier, in which case its length is included. (This changed in release
- 10.23. In earlier releases, before an opening parenthesis the length
- was that of the entire group, and before an alternation bar or a clos-
- ing parenthesis the length was zero.)
+ 10.23. In earlier releases, before an opening parenthesis the length
+ was that of the entire subpattern, and before an alternation bar or a
+ closing parenthesis the length was zero.)
- The pattern_position and next_item_length fields are intended to help
- in distinguishing between different automatic callouts, which all have
- the same callout number. However, they are set for all callouts, and
+ The pattern_position and next_item_length fields are intended to help
+ in distinguishing between different automatic callouts, which all have
+ the same callout number. However, they are set for all callouts, and
are used by pcre2test to show the next item to be matched when display-
ing callout information.
In callouts from pcre2_match() the mark field contains a pointer to the
- zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
- (*THEN) item in the match, or NULL if no such items have been passed.
- Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
+ zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
+ (*THEN) item in the match, or NULL if no such items have been passed.
+ Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
previous (*MARK). In callouts from the DFA matching function this field
always contains NULL.
@@ -4591,25 +4439,25 @@ THE CALLOUT INTERFACE
PCRE2_CALLOUT_STARTMATCH
- This is set for the first callout after the start of matching for each
+ This is set for the first callout after the start of matching for each
new starting position in the subject.
PCRE2_CALLOUT_BACKTRACK
- This is set if there has been a matching backtrack since the previous
- callout, or since the start of matching if this is the first callout
+ This is set if there has been a matching backtrack since the previous
+ callout, or since the start of matching if this is the first callout
from a pcre2_match() run.
- Both bits are set when a backtrack has caused a "bumpalong" to a new
- starting position in the subject. Output from pcre2test does not indi-
- cate the presence of these bits unless the callout_extra modifier is
+ Both bits are set when a backtrack has caused a "bumpalong" to a new
+ starting position in the subject. Output from pcre2test does not indi-
+ cate the presence of these bits unless the callout_extra modifier is
set.
The information in the callout_flags field is provided so that applica-
- tions can track and tell their users how matching with backtracking is
- done. This can be useful when trying to optimize patterns, or just to
- understand how PCRE2 works. There is no support in pcre2_dfa_match()
- because there is no backtracking in DFA matching, and there is no sup-
+ tions can track and tell their users how matching with backtracking is
+ done. This can be useful when trying to optimize patterns, or just to
+ understand how PCRE2 works. There is no support in pcre2_dfa_match()
+ because there is no backtracking in DFA matching, and there is no sup-
port in JIT because JIT is all about maximimizing matching performance.
In both these cases the callout_flags field is always zero.
@@ -4617,16 +4465,16 @@ THE CALLOUT INTERFACE
RETURN VALUES FROM CALLOUTS
The external callout function returns an integer to PCRE2. If the value
- is zero, matching proceeds as normal. If the value is greater than
- zero, matching fails at the current point, but the testing of other
+ is zero, matching proceeds as normal. If the value is greater than
+ zero, matching fails at the current point, but the testing of other
matching possibilities goes ahead, just as if a lookahead assertion had
failed. If the value is less than zero, the match is abandoned, and the
matching function returns the negative value.
- Negative values should normally be chosen from the set of
- PCRE2_ERROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a
- standard "no match" failure. The error number PCRE2_ERROR_CALLOUT is
- reserved for use by callout functions; it will never be used by PCRE2
+ Negative values should normally be chosen from the set of
+ PCRE2_ERROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a
+ standard "no match" failure. The error number PCRE2_ERROR_CALLOUT is
+ reserved for use by callout functions; it will never be used by PCRE2
itself.
@@ -4637,14 +4485,14 @@ CALLOUT ENUMERATION
void *user_data);
A script language that supports the use of string arguments in callouts
- might like to scan all the callouts in a pattern before running the
+ might like to scan all the callouts in a pattern before running the
match. This can be done by calling pcre2_callout_enumerate(). The first
- argument is a pointer to a compiled pattern, the second points to a
- callback function, and the third is arbitrary user data. The callback
- function is called for every callout in the pattern in the order in
+ argument is a pointer to a compiled pattern, the second points to a
+ callback function, and the third is arbitrary user data. The callback
+ function is called for every callout in the pattern in the order in
which they appear. Its first argument is a pointer to a callout enumer-
- ation block, and its second argument is the user_data value that was
- passed to pcre2_callout_enumerate(). The data block contains the fol-
+ ation block, and its second argument is the user_data value that was
+ passed to pcre2_callout_enumerate(). The data block contains the fol-
lowing fields:
version Block version number
@@ -4655,17 +4503,17 @@ CALLOUT ENUMERATION
callout_string_length Length of callout string
callout_string Points to callout string or is NULL
- The version number is currently 0. It will increase if new fields are
- ever added to the block. The remaining fields are the same as their
- namesakes in the pcre2_callout block that is used for callouts during
+ The version number is currently 0. It will increase if new fields are
+ ever added to the block. The remaining fields are the same as their
+ namesakes in the pcre2_callout block that is used for callouts during
matching, as described above.
- Note that the value of pattern_position is unique for each callout.
- However, if a callout occurs inside a group that is quantified with a
+ Note that the value of pattern_position is unique for each callout.
+ However, if a callout occurs inside a group that is quantified with a
non-zero minimum or a fixed maximum, the group is replicated inside the
- compiled pattern. For example, a pattern such as /(a){2}/ is compiled
- as if it were /(a)(a)/. This means that the callout will be enumerated
- more than once, but with the same value for pattern_position in each
+ compiled pattern. For example, a pattern such as /(a){2}/ is compiled
+ as if it were /(a)(a)/. This means that the callout will be enumerated
+ more than once, but with the same value for pattern_position in each
case.
The callback function should normally return zero. If it returns a non-
@@ -4682,8 +4530,8 @@ AUTHOR
REVISION
- Last updated: 03 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 26 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4712,10 +4560,10 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
repeat quantifiers on other assertions, for example, \b* (but not
\b{3}), but these do not seem to have any use.
- 3. Capture groups that occur inside negative lookaround assertions are
- counted, but their entries in the offsets vector are set only when a
- negative assertion is a condition that has a matching branch (that is,
- the condition is false).
+ 3. Capturing subpatterns that occur inside negative lookaround asser-
+ tions are counted, but their entries in the offsets vector are set only
+ when a negative assertion is a condition that has a matching branch
+ (that is, the condition is false).
4. The following Perl escape sequences are not supported: \F, \l, \L,
\u, \U, and \N when followed by a character name. \N on its own, match-
@@ -4723,28 +4571,27 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
point, are supported. The escapes that modify the case of following
letters are implemented by Perl's general string-handling and are not
part of its pattern matching engine. If any of these are encountered by
- PCRE2, an error is generated by default. However, if either of the
- PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are
- interpreted as ECMAScript interprets them.
+ PCRE2, an error is generated by default. However, if the PCRE2_ALT_BSUX
+ option is set, \U and \u are interpreted as ECMAScript interprets them.
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2
is built with Unicode support (the default). The properties that can be
- tested with \p and \P are limited to the general category properties
- such as Lu and Nd, script names such as Greek or Han, and the derived
+ tested with \p and \P are limited to the general category properties
+ such as Lu and Nd, script names such as Greek or Han, and the derived
properties Any and L&. PCRE2 does support the Cs (surrogate) property,
- which Perl does not; the Perl documentation says "Because Perl hides
+ which Perl does not; the Perl documentation says "Because Perl hides
the need for the user to understand the internal representation of Uni-
- code characters, there is no need to implement the somewhat messy con-
+ code characters, there is no need to implement the somewhat messy con-
cept of surrogates."
6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
in between are treated as literals. However, this is slightly different
- from Perl in that $ and @ are also handled as literals inside the
+ from Perl in that $ and @ are also handled as literals inside the
quotes. In Perl, they cause variable interpolation (but of course PCRE2
- does not have variables). Also, Perl does "double-quotish backslash
+ does not have variables). Also, Perl does "double-quotish backslash
interpolation" on any backslashes between \Q and \E which, its documen-
- tation says, "may lead to confusing results". PCRE2 treats a backslash
- between \Q and \E just like any other character. Note the following
+ tation says, "may lead to confusing results". PCRE2 treats a backslash
+ between \Q and \E just like any other character. Note the following
examples:
Pattern PCRE2 matches Perl matches
@@ -4756,54 +4603,55 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
\QA\B\E A\B A\B
\Q\\E \ \\E
- The \Q...\E sequence is recognized both inside and outside character
+ The \Q...\E sequence is recognized both inside and outside character
classes.
- 7. Fairly obviously, PCRE2 does not support the (?{code}) and
+ 7. Fairly obviously, PCRE2 does not support the (?{code}) and
(??{code}) constructions. However, PCRE2 does have a "callout" feature,
which allows an external function to be called during pattern matching.
See the pcre2callout documentation for details.
- 8. Subroutine calls (whether recursive or not) were treated as atomic
- groups up to PCRE2 release 10.23, but from release 10.30 this changed,
+ 8. Subroutine calls (whether recursive or not) were treated as atomic
+ groups up to PCRE2 release 10.23, but from release 10.30 this changed,
and backtracking into subroutine calls is now supported, as in Perl.
- 9. If any of the backtracking control verbs are used in a group that is
- called as a subroutine (whether or not recursively), their effect is
- confined to that group; it does not extend to the surrounding pattern.
- This is not always the case in Perl. In particular, if (*THEN) is
- present in a group that is called as a subroutine, its action is lim-
- ited to that group, even if the group does not contain any | charac-
- ters. Note that such groups are processed as anchored at the point
- where they are tested.
-
- 10. If a pattern contains more than one backtracking control verb, the
- first one that is backtracked onto acts. For example, in the pattern
- A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure
+ 9. If any of the backtracking control verbs are used in a subpattern
+ that is called as a subroutine (whether or not recursively), their
+ effect is confined to that subpattern; it does not extend to the sur-
+ rounding pattern. This is not always the case in Perl. In particular,
+ if (*THEN) is present in a group that is called as a subroutine, its
+ action is limited to that group, even if the group does not contain any
+ | characters. Note that such subpatterns are processed as anchored at
+ the point where they are tested.
+
+ 10. If a pattern contains more than one backtracking control verb, the
+ first one that is backtracked onto acts. For example, in the pattern
+ A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure
in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases
it is the same as PCRE2, but there are cases where it differs.
- 11. Most backtracking verbs in assertions have their normal actions.
+ 11. Most backtracking verbs in assertions have their normal actions.
They are not confined to the assertion.
- 12. There are some differences that are concerned with the settings of
- captured strings when part of a pattern is repeated. For example,
- matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2
+ 12. There are some differences that are concerned with the settings of
+ captured strings when part of a pattern is repeated. For example,
+ matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2
unset, but in PCRE2 it is set to "b".
- 13. PCRE2's handling of duplicate capture group numbers and names is
- not as general as Perl's. This is a consequence of the fact the PCRE2
- works internally just with numbers, using an external table to trans-
- late between numbers and names. In particular, a pattern such as
- (?|(?<a>A)|(?<b>B), where the two capture groups have the same number
- but different names, is not supported, and causes an error at compile
- time. If it were allowed, it would not be possible to distinguish which
- group matched, because both names map to capture group number 1. To
- avoid this confusing situation, an error is given at compile time.
+ 13. PCRE2's handling of duplicate subpattern numbers and duplicate sub-
+ pattern names is not as general as Perl's. This is a consequence of the
+ fact the PCRE2 works internally just with numbers, using an external
+ table to translate between numbers and names. In particular, a pattern
+ such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have
+ the same number but different names, is not supported, and causes an
+ error at compile time. If it were allowed, it would not be possible to
+ distinguish which parentheses matched, because both names map to cap-
+ turing subpattern number 1. To avoid this confusing situation, an error
+ is given at compile time.
14. Perl used to recognize comments in some places that PCRE2 does not,
- for example, between the ( and ? at the start of a group. If the /x
- modifier is set, Perl allowed white space between ( and ? though the
+ for example, between the ( and ? at the start of a subpattern. If the
+ /x modifier is set, Perl allowed white space between ( and ? though the
latest Perls give an error (for a while it was just deprecated). There
may still be some cases where Perl behaves differently.
@@ -4887,8 +4735,8 @@ AUTHOR
REVISION
- Last updated: 12 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 28 July 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -5010,56 +4858,32 @@ SIMPLE USE OF JIT
to handle the pattern.
-MATCHING SUBJECTS CONTAINING INVALID UTF
-
- When a pattern is compiled with the PCRE2_UTF option, the interpretive
- matching function expects its subject string to be a valid sequence of
- UTF code units. If it is not, the result is undefined. This is also
- true by default of matching via JIT. However, if the option
- PCRE2_JIT_INVALID_UTF is passed to pcre2_jit_compile(), code that can
- process a subject containing invalid UTF is compiled.
-
- In this mode, an invalid code unit sequence never matches any pattern
- item. It does not match dot, it does not match \p{Any}, it does not
- even match negative items such as [^X]. A lookbehind assertion fails if
- it encounters an invalid sequence while moving the current point back-
- wards. In other words, an invalid UTF code unit sequence acts as a bar-
- rier which no match can cross. Reaching an invalid sequence causes an
- immediate backtrack.
-
- Using this option, an application can run matches in arbitrary data,
- knowing that any matched strings that are returned will be valid UTF.
- This can be useful when searching for text in executable or other
- binary files.
-
-
UNSUPPORTED OPTIONS AND PATTERN ITEMS
The pcre2_match() options that are supported for JIT matching are
- PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
- PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
- PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options
- are not supported at match time.
+ PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+ PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
+ PCRE2_ANCHORED option is not supported at match time.
- If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the
+ If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the
use of JIT, forcing matching by the interpreter code.
- The only unsupported pattern items are \C (match a single data unit)
- when running in a UTF mode, and a callout immediately before an asser-
+ The only unsupported pattern items are \C (match a single data unit)
+ when running in a UTF mode, and a callout immediately before an asser-
tion condition in a conditional group.
RETURN VALUES FROM JIT MATCHING
When a pattern is matched using JIT matching, the return values are the
- same as those given by the interpretive pcre2_match() code, with the
- addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
- that the memory used for the JIT stack was insufficient. See "Control-
+ same as those given by the interpretive pcre2_match() code, with the
+ addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
+ that the memory used for the JIT stack was insufficient. See "Control-
ling the JIT stack" below for a discussion of JIT stack usage.
- The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
- searching a very large pattern tree goes on for too long, as it is in
- the same circumstance when JIT is not used, but the details of exactly
+ The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
+ searching a very large pattern tree goes on for too long, as it is in
+ the same circumstance when JIT is not used, but the details of exactly
what is counted are not the same. The PCRE2_ERROR_DEPTHLIMIT error code
is never returned when JIT matching is used.
@@ -5067,25 +4891,25 @@ RETURN VALUES FROM JIT MATCHING
CONTROLLING THE JIT STACK
When the compiled JIT code runs, it needs a block of memory to use as a
- stack. By default, it uses 32KiB on the machine stack. However, some
- large or complicated patterns need more than this. The error
- PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
- Three functions are provided for managing blocks of memory for use as
- JIT stacks. There is further discussion about the use of JIT stacks in
+ stack. By default, it uses 32KiB on the machine stack. However, some
+ large or complicated patterns need more than this. The error
+ PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
+ Three functions are provided for managing blocks of memory for use as
+ JIT stacks. There is further discussion about the use of JIT stacks in
the section entitled "JIT stack FAQ" below.
- The pcre2_jit_stack_create() function creates a JIT stack. Its argu-
- ments are a starting size, a maximum size, and a general context (for
- memory allocation functions, or NULL for standard memory allocation).
+ The pcre2_jit_stack_create() function creates a JIT stack. Its argu-
+ ments are a starting size, a maximum size, and a general context (for
+ memory allocation functions, or NULL for standard memory allocation).
It returns a pointer to an opaque structure of type pcre2_jit_stack, or
- NULL if there is an error. The pcre2_jit_stack_free() function is used
+ NULL if there is an error. The pcre2_jit_stack_free() function is used
to free a stack that is no longer needed. If its argument is NULL, this
- function returns immediately, without doing anything. (For the techni-
- cally minded: the address space is allocated by mmap or VirtualAlloc.)
- A maximum stack size of 512KiB to 1MiB should be more than enough for
+ function returns immediately, without doing anything. (For the techni-
+ cally minded: the address space is allocated by mmap or VirtualAlloc.)
+ A maximum stack size of 512KiB to 1MiB should be more than enough for
any pattern.
- The pcre2_jit_stack_assign() function specifies which stack JIT code
+ The pcre2_jit_stack_assign() function specifies which stack JIT code
should use. Its arguments are as follows:
pcre2_match_context *mcontext
@@ -5095,7 +4919,7 @@ CONTROLLING THE JIT STACK
The first argument is a pointer to a match context. When this is subse-
quently passed to a matching function, its information determines which
JIT stack is used. If this argument is NULL, the function returns imme-
- diately, without doing anything. There are three cases for the values
+ diately, without doing anything. There are three cases for the values
of the other two options:
(1) If callback is NULL and data is NULL, an internal 32KiB block
@@ -5113,34 +4937,34 @@ CONTROLLING THE JIT STACK
return value must be a valid JIT stack, the result of calling
pcre2_jit_stack_create().
- A callback function is obeyed whenever JIT code is about to be run; it
+ A callback function is obeyed whenever JIT code is about to be run; it
is not obeyed when pcre2_match() is called with options that are incom-
- patible for JIT matching. A callback function can therefore be used to
- determine whether a match operation was executed by JIT or by the
+ patible for JIT matching. A callback function can therefore be used to
+ determine whether a match operation was executed by JIT or by the
interpreter.
You may safely use the same JIT stack for more than one pattern (either
- by assigning directly or by callback), as long as the patterns are
+ by assigning directly or by callback), as long as the patterns are
matched sequentially in the same thread. Currently, the only way to set
- up non-sequential matches in one thread is to use callouts: if a call-
- out function starts another match, that match must use a different JIT
+ up non-sequential matches in one thread is to use callouts: if a call-
+ out function starts another match, that match must use a different JIT
stack to the one used for currently suspended match(es).
- In a multithread application, if you do not specify a JIT stack, or if
- you assign or pass back NULL from a callback, that is thread-safe,
- because each thread has its own machine stack. However, if you assign
- or pass back a non-NULL JIT stack, this must be a different stack for
+ In a multithread application, if you do not specify a JIT stack, or if
+ you assign or pass back NULL from a callback, that is thread-safe,
+ because each thread has its own machine stack. However, if you assign
+ or pass back a non-NULL JIT stack, this must be a different stack for
each thread so that the application is thread-safe.
- Strictly speaking, even more is allowed. You can assign the same non-
- NULL stack to a match context that is used by any number of patterns,
- as long as they are not used for matching by multiple threads at the
- same time. For example, you could use the same stack in all compiled
- patterns, with a global mutex in the callback to wait until the stack
+ Strictly speaking, even more is allowed. You can assign the same non-
+ NULL stack to a match context that is used by any number of patterns,
+ as long as they are not used for matching by multiple threads at the
+ same time. For example, you could use the same stack in all compiled
+ patterns, with a global mutex in the callback to wait until the stack
is available for use. However, this is an inefficient solution, and not
recommended.
- This is a suggestion for how a multithreaded program that needs to set
+ This is a suggestion for how a multithreaded program that needs to set
up non-default JIT stacks might operate:
During thread initalization
@@ -5152,7 +4976,7 @@ CONTROLLING THE JIT STACK
Use a one-line callback function
return thread_local_var
- All the functions described in this section do nothing if JIT is not
+ All the functions described in this section do nothing if JIT is not
available.
@@ -5161,20 +4985,20 @@ JIT STACK FAQ
(1) Why do we need JIT stacks?
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack
- where the local data of the current node is pushed before checking its
+ where the local data of the current node is pushed before checking its
child nodes. Allocating real machine stack on some platforms is diffi-
cult. For example, the stack chain needs to be updated every time if we
- extend the stack on PowerPC. Although it is possible, its updating
+ extend the stack on PowerPC. Although it is possible, its updating
time overhead decreases performance. So we do the recursion in memory.
(2) Why don't we simply allocate blocks of memory with malloc()?
- Modern operating systems have a nice feature: they can reserve an
+ Modern operating systems have a nice feature: they can reserve an
address space instead of allocating memory. We can safely allocate mem-
- ory pages inside this address space, so the stack could grow without
+ ory pages inside this address space, so the stack could grow without
moving memory data (this is important because of pointers). Thus we can
allocate 1MiB address space, and use only a single memory page (usually
- 4KiB) if that is enough. However, we can still grow up to 1MiB anytime
+ 4KiB) if that is enough. However, we can still grow up to 1MiB anytime
if needed.
(3) Who "owns" a JIT stack?
@@ -5182,8 +5006,8 @@ JIT STACK FAQ
The owner of the stack is the user program, not the JIT studied pattern
or anything else. The user program must ensure that if a stack is being
used by pcre2_match(), (that is, it is assigned to a match context that
- is passed to the pattern currently running), that stack must not be
- used by any other threads (to avoid overwriting the same memory area).
+ is passed to the pattern currently running), that stack must not be
+ used by any other threads (to avoid overwriting the same memory area).
The best practice for multithreaded programs is to allocate a stack for
each thread, and return this stack through the JIT callback function.
@@ -5191,36 +5015,36 @@ JIT STACK FAQ
You can free a JIT stack at any time, as long as it will not be used by
pcre2_match() again. When you assign the stack to a match context, only
- a pointer is set. There is no reference counting or any other magic.
+ a pointer is set. There is no reference counting or any other magic.
You can free compiled patterns, contexts, and stacks in any order, any-
- time. Just do not call pcre2_match() with a match context pointing to
+ time. Just do not call pcre2_match() with a match context pointing to
an already freed stack, as that will cause SEGFAULT. (Also, do not free
- a stack currently used by pcre2_match() in another thread). You can
- also replace the stack in a context at any time when it is not in use.
+ a stack currently used by pcre2_match() in another thread). You can
+ also replace the stack in a context at any time when it is not in use.
You should free the previous stack before assigning a replacement.
- (5) Should I allocate/free a stack every time before/after calling
+ (5) Should I allocate/free a stack every time before/after calling
pcre2_match()?
- No, because this is too costly in terms of resources. However, you
- could implement some clever idea which release the stack if it is not
- used in let's say two minutes. The JIT callback can help to achieve
+ No, because this is too costly in terms of resources. However, you
+ could implement some clever idea which release the stack if it is not
+ used in let's say two minutes. The JIT callback can help to achieve
this without keeping a list of patterns.
- (6) OK, the stack is for long term memory allocation. But what happens
- if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB
+ (6) OK, the stack is for long term memory allocation. But what happens
+ if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB
kept until the stack is freed?
- Especially on embedded sytems, it might be a good idea to release mem-
- ory sometimes without freeing the stack. There is no API for this at
- the moment. Probably a function call which returns with the currently
- allocated memory for any stack and another which allows releasing mem-
+ Especially on embedded sytems, it might be a good idea to release mem-
+ ory sometimes without freeing the stack. There is no API for this at
+ the moment. Probably a function call which returns with the currently
+ allocated memory for any stack and another which allows releasing mem-
ory (shrinking the stack) would be a good idea if someone needs this.
(7) This is too much of a headache. Isn't there any better solution for
JIT stack handling?
- No, thanks to Windows. If POSIX threads were used everywhere, we could
+ No, thanks to Windows. If POSIX threads were used everywhere, we could
throw out this complicated API.
@@ -5229,18 +5053,18 @@ FREEING JIT SPECULATIVE MEMORY
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
The JIT executable allocator does not free all memory when it is possi-
- ble. It expects new allocations, and keeps some free memory around to
- improve allocation speed. However, in low memory conditions, it might
- be better to free all possible memory. You can cause this to happen by
- calling pcre2_jit_free_unused_memory(). Its argument is a general con-
+ ble. It expects new allocations, and keeps some free memory around to
+ improve allocation speed. However, in low memory conditions, it might
+ be better to free all possible memory. You can cause this to happen by
+ calling pcre2_jit_free_unused_memory(). Its argument is a general con-
text, for custom memory management, or NULL for standard memory manage-
ment.
EXAMPLE CODE
- This is a single-threaded example that specifies a JIT stack without
- using a callback. A real program should include error checking after
+ This is a single-threaded example that specifies a JIT stack without
+ using a callback. A real program should include error checking after
all the function calls.
int rc;
@@ -5268,31 +5092,29 @@ EXAMPLE CODE
JIT FAST PATH API
Because the API described above falls back to interpreted matching when
- JIT is not available, it is convenient for programs that are written
+ JIT is not available, it is convenient for programs that are written
for general use in many environments. However, calling JIT via
pcre2_match() does have a performance impact. Programs that are written
- for use where JIT is known to be available, and which need the best
- possible performance, can instead use a "fast path" API to call JIT
- matching directly instead of calling pcre2_match() (obviously only for
+ for use where JIT is known to be available, and which need the best
+ possible performance, can instead use a "fast path" API to call JIT
+ matching directly instead of calling pcre2_match() (obviously only for
patterns that have been successfully processed by pcre2_jit_compile()).
- The fast path function is called pcre2_jit_match(), and it takes
- exactly the same arguments as pcre2_match(). However, the subject
- string must be specified with a length; PCRE2_ZERO_TERMINATED is not
- supported. Unsupported option bits (for example, PCRE2_ANCHORED,
- PCRE2_ENDANCHORED and PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is
- the PCRE2_NO_JIT option. The return values are also the same as for
- pcre2_match(), plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (par-
- tial or complete) is requested that was not compiled.
+ The fast path function is called pcre2_jit_match(), and it takes
+ exactly the same arguments as pcre2_match(). The return values are also
+ the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or
+ complete) is requested that was not compiled. Unsupported option bits
+ (for example, PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT
+ option.
- When you call pcre2_match(), as well as testing for invalid options, a
+ When you call pcre2_match(), as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For exam-
ple, if the subject pointer is NULL, an immediate error is given. Also,
- unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
- validity. In the interests of speed, these checks do not happen on the
+ unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
+ validity. In the interests of speed, these checks do not happen on the
JIT fast path, and if invalid data is passed, the result is undefined.
- Bypassing the sanity checks and the pcre2_match() wrapping can give
+ Bypassing the sanity checks and the pcre2_match() wrapping can give
speedups of more than 10%.
@@ -5310,8 +5132,8 @@ AUTHOR
REVISION
- Last updated: 06 March 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 28 June 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -5352,22 +5174,23 @@ SIZE AND OTHER LIMITATIONS
The maximum length of a lookbehind assertion is 65535 characters.
- There is no limit to the number of parenthesized groups, but there can
- be no more than 65535 capture groups, and there is a limit to the depth
- of nesting of parenthesized subpatterns of all kinds. This is imposed
- in order to limit the amount of system stack used at compile time. The
- default limit can be specified when PCRE2 is built; if not, the default
- is set to 250. An application can change this limit by calling
- pcre2_set_parens_nest_limit() to set the limit in a compile context.
+ There is no limit to the number of parenthesized subpatterns, but there
+ can be no more than 65535 capturing subpatterns. There is, however, a
+ limit to the depth of nesting of parenthesized subpatterns of all
+ kinds. This is imposed in order to limit the amount of system stack
+ used at compile time. The default limit can be specified when PCRE2 is
+ built; if not, the default is set to 250. An application can change
+ this limit by calling pcre2_set_parens_nest_limit() to set the limit in
+ a compile context.
- The maximum length of name for a named capture group is 32 code units,
- and the maximum number of such groups is 10000.
+ The maximum length of name for a named subpattern is 32 code units, and
+ the maximum number of named subpatterns is 10000.
- The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or
- (*THEN) verb is 255 code units for the 8-bit library and 65535 code
+ The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or
+ (*THEN) verb is 255 code units for the 8-bit library and 65535 code
units for the 16-bit and 32-bit libraries.
- The maximum length of a string argument to a callout is the largest
+ The maximum length of a string argument to a callout is the largest
number a 32-bit unsigned integer can hold.
@@ -5380,8 +5203,8 @@ AUTHOR
REVISION
- Last updated: 02 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 30 March 2017
+ Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
@@ -5505,8 +5328,8 @@ THE ALTERNATIVE MATCHING ALGORITHM
SESS option when compiling.
There are a number of features of PCRE2 regular expressions that are
- not supported or behave differently in the alternative matching func-
- tion. Those that are not supported cause an error if encountered.
+ not supported by the alternative matching algorithm. They are as fol-
+ lows:
1. Because the algorithm finds all possible matches, the greedy or
ungreedy nature of repetition quantifiers is not relevant (though it
@@ -5531,48 +5354,47 @@ THE ALTERNATIVE MATCHING ALGORITHM
strings are available.
3. Because no substrings are captured, backreferences within the pat-
- tern are not supported.
+ tern are not supported, and cause errors if encountered.
4. For the same reason, conditional expressions that use a backrefer-
ence as the condition or test for a specific group recursion are not
supported.
- 5. Again for the same reason, script runs are not supported.
-
- 6. Because many paths through the tree may be active, the \K escape
+ 5. Because many paths through the tree may be active, the \K escape
sequence, which resets the start of the match when encountered (but may
- be on some paths and not on others), is not supported.
+ be on some paths and not on others), is not supported. It causes an
+ error if encountered.
- 7. Callouts are supported, but the value of the capture_top field is
+ 6. Callouts are supported, but the value of the capture_top field is
always 1, and the value of the capture_last field is always 0.
- 8. The \C escape sequence, which (in the standard algorithm) always
- matches a single code unit, even in a UTF mode, is not supported in
- these modes, because the alternative algorithm moves through the sub-
- ject string one character (not code unit) at a time, for all active
+ 7. The \C escape sequence, which (in the standard algorithm) always
+ matches a single code unit, even in a UTF mode, is not supported in
+ these modes, because the alternative algorithm moves through the sub-
+ ject string one character (not code unit) at a time, for all active
paths through the tree.
- 9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE)
- are not supported. (*FAIL) is supported, and behaves like a failing
+ 8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE)
+ are not supported. (*FAIL) is supported, and behaves like a failing
negative assertion.
ADVANTAGES OF THE ALTERNATIVE ALGORITHM
- Using the alternative matching algorithm provides the following advan-
+ Using the alternative matching algorithm provides the following advan-
tages:
1. All possible matches (at a single point in the subject) are automat-
- ically found, and in particular, the longest match is found. To find
+ ically found, and in particular, the longest match is found. To find
more than one match using the standard algorithm, you have to do kludgy
things with callouts.
- 2. Because the alternative algorithm scans the subject string just
+ 2. Because the alternative algorithm scans the subject string just
once, and never needs to backtrack (except for lookbehinds), it is pos-
- sible to pass very long subject strings to the matching function in
+ sible to pass very long subject strings to the matching function in
several pieces, checking for partial matching each time. Although it is
- also possible to do multi-segment matching using the standard algo-
- rithm, by retaining partially matched substrings, it is more compli-
+ also possible to do multi-segment matching using the standard algo-
+ rithm, by retaining partially matched substrings, it is more compli-
cated. The pcre2partial documentation gives details of partial matching
and discusses multi-segment matching.
@@ -5581,12 +5403,11 @@ DISADVANTAGES OF THE ALTERNATIVE ALGORITHM
The alternative algorithm suffers from a number of disadvantages:
- 1. It is substantially slower than the standard algorithm. This is
- partly because it has to search for all possible matches, but is also
+ 1. It is substantially slower than the standard algorithm. This is
+ partly because it has to search for all possible matches, but is also
because it is less susceptible to optimization.
- 2. Capturing parentheses, backreferences, and script runs are not sup-
- ported.
+ 2. Capturing parentheses and backreferences are not supported.
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
@@ -5601,8 +5422,8 @@ AUTHOR
REVISION
- Last updated: 10 October 2018
- Copyright (c) 1997-2018 University of Cambridge.
+ Last updated: 29 September 2014
+ Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
@@ -6070,23 +5891,22 @@ PCRE2 REGULAR EXPRESSION DETAILS
great detail. This description of PCRE2's regular expressions is
intended as reference material.
- This document discusses the regular expression patterns that are sup-
- ported by PCRE2 when its main matching function, pcre2_match(), is
- used. PCRE2 also has an alternative matching function,
- pcre2_dfa_match(), which matches using a different algorithm that is
- not Perl-compatible. Some of the features discussed below are not
- available when DFA matching is used. The advantages and disadvantages
- of the alternative function, and how it differs from the normal func-
- tion, are discussed in the pcre2matching page.
+ This document discusses the patterns that are supported by PCRE2 when
+ its main matching function, pcre2_match(), is used. PCRE2 also has an
+ alternative matching function, pcre2_dfa_match(), which matches using a
+ different algorithm that is not Perl-compatible. Some of the features
+ discussed below are not available when DFA matching is used. The advan-
+ tages and disadvantages of the alternative function, and how it differs
+ from the normal function, are discussed in the pcre2matching page.
SPECIAL START-OF-PATTERN ITEMS
- A number of options that can be passed to pcre2_compile() can also be
+ A number of options that can be passed to pcre2_compile() can also be
set by special items at the start of a pattern. These are not Perl-com-
- patible, but are provided to make these options accessible to pattern
- writers who are not able to change the program that processes the pat-
- tern. Any number of these items may appear, but they must all be
+ patible, but are provided to make these options accessible to pattern
+ writers who are not able to change the program that processes the pat-
+ tern. Any number of these items may appear, but they must all be
together right at the start of the pattern string, and the letters must
be in upper case.
@@ -6094,86 +5914,86 @@ SPECIAL START-OF-PATTERN ITEMS
In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either
as single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32
- can be specified for the 32-bit library, in which case it constrains
- the character values to valid Unicode code points. To process UTF
- strings, PCRE2 must be built to include Unicode support (which is the
- default). When using UTF strings you must either call the compiling
- function with the PCRE2_UTF option, or the pattern must start with the
- special sequence (*UTF), which is equivalent to setting the relevant
+ can be specified for the 32-bit library, in which case it constrains
+ the character values to valid Unicode code points. To process UTF
+ strings, PCRE2 must be built to include Unicode support (which is the
+ default). When using UTF strings you must either call the compiling
+ function with the PCRE2_UTF option, or the pattern must start with the
+ special sequence (*UTF), which is equivalent to setting the relevant
option. How setting a UTF mode affects pattern matching is mentioned in
- several places below. There is also a summary of features in the
+ several places below. There is also a summary of features in the
pcre2unicode page.
Some applications that allow their users to supply patterns may wish to
- restrict them to non-UTF data for security reasons. If the
- PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not
+ restrict them to non-UTF data for security reasons. If the
+ PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not
allowed, and its appearance in a pattern causes an error.
Unicode property support
- Another special sequence that may appear at the start of a pattern is
- (*UCP). This has the same effect as setting the PCRE2_UCP option: it
- causes sequences such as \d and \w to use Unicode properties to deter-
+ Another special sequence that may appear at the start of a pattern is
+ (*UCP). This has the same effect as setting the PCRE2_UCP option: it
+ causes sequences such as \d and \w to use Unicode properties to deter-
mine character types, instead of recognizing only characters with codes
less than 256 via a lookup table.
Some applications that allow their users to supply patterns may wish to
- restrict them for security reasons. If the PCRE2_NEVER_UCP option is
+ restrict them for security reasons. If the PCRE2_NEVER_UCP option is
passed to pcre2_compile(), (*UCP) is not allowed, and its appearance in
a pattern causes an error.
Locking out empty string matching
Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same
- effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option
+ effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option
to whichever matching function is subsequently called to match the pat-
- tern. These options lock out the matching of empty strings, either
+ tern. These options lock out the matching of empty strings, either
entirely, or only at the start of the subject.
Disabling auto-possessification
- If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as
- setting the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making
- quantifiers possessive when what follows cannot match the repeated
+ If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as
+ setting the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making
+ quantifiers possessive when what follows cannot match the repeated
item. For example, by default a+b is treated as a++b. For more details,
see the pcre2api documentation.
Disabling start-up optimizations
- If a pattern starts with (*NO_START_OPT), it has the same effect as
+ If a pattern starts with (*NO_START_OPT), it has the same effect as
setting the PCRE2_NO_START_OPTIMIZE option. This disables several opti-
- mizations for quickly reaching "no match" results. For more details,
+ mizations for quickly reaching "no match" results. For more details,
see the pcre2api documentation.
Disabling automatic anchoring
- If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect
- as setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimiza-
+ If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect
+ as setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimiza-
tions that apply to patterns whose top-level branches all start with .*
- (match any number of arbitrary characters). For more details, see the
+ (match any number of arbitrary characters). For more details, see the
pcre2api documentation.
Disabling JIT compilation
- If a pattern that starts with (*NO_JIT) is successfully compiled, an
- attempt by the application to apply the JIT optimization by calling
+ If a pattern that starts with (*NO_JIT) is successfully compiled, an
+ attempt by the application to apply the JIT optimization by calling
pcre2_jit_compile() is ignored.
Setting match resource limits
The pcre2_match() function contains a counter that is incremented every
time it goes round its main loop. The caller of pcre2_match() can set a
- limit on this counter, which therefore limits the amount of computing
+ limit on this counter, which therefore limits the amount of computing
resource used for a match. The maximum depth of nested backtracking can
- also be limited; this indirectly restricts the amount of heap memory
- that is used, but there is also an explicit memory limit that can be
+ also be limited; this indirectly restricts the amount of heap memory
+ that is used, but there is also an explicit memory limit that can be
set.
- These facilities are provided to catch runaway matches that are pro-
- voked by patterns with huge matching trees. A common example is a pat-
- tern with nested unlimited repeats applied to a long string that does
- not match. When one of these limits is reached, pcre2_match() gives an
- error return. The limits can also be set by items at the start of the
+ These facilities are provided to catch runaway matches that are pro-
+ voked by patterns with huge matching trees (a typical example is a pat-
+ tern with nested unlimited repeats applied to a long string that does
+ not match). When one of these limits is reached, pcre2_match() gives an
+ error return. The limits can also be set by items at the start of the
pattern of the form
(*LIMIT_HEAP=d)
@@ -6181,35 +6001,35 @@ SPECIAL START-OF-PATTERN ITEMS
(*LIMIT_DEPTH=d)
where d is any number of decimal digits. However, the value of the set-
- ting must be less than the value set (or defaulted) by the caller of
- pcre2_match() for it to have any effect. In other words, the pattern
- writer can lower the limits set by the programmer, but not raise them.
- If there is more than one setting of one of these limits, the lower
- value is used. The heap limit is specified in kibibytes (units of 1024
+ ting must be less than the value set (or defaulted) by the caller of
+ pcre2_match() for it to have any effect. In other words, the pattern
+ writer can lower the limits set by the programmer, but not raise them.
+ If there is more than one setting of one of these limits, the lower
+ value is used. The heap limit is specified in kibibytes (units of 1024
bytes).
- Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
+ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
name is still recognized for backwards compatibility.
The heap limit applies only when the pcre2_match() or pcre2_dfa_match()
interpreters are used for matching. It does not apply to JIT. The match
- limit is used (but in a different way) when JIT is being used, or when
+ limit is used (but in a different way) when JIT is being used, or when
pcre2_dfa_match() is called, to limit computing resource usage by those
- matching functions. The depth limit is ignored by JIT but is relevant
- for DFA matching, which uses function recursion for recursions within
- the pattern and for lookaround assertions and atomic groups. In this
+ matching functions. The depth limit is ignored by JIT but is relevant
+ for DFA matching, which uses function recursion for recursions within
+ the pattern and for lookaround assertions and atomic groups. In this
case, the depth limit controls the depth of such recursion.
Newline conventions
- PCRE2 supports six different conventions for indicating line breaks in
- strings: a single CR (carriage return) character, a single LF (line-
+ PCRE2 supports six different conventions for indicating line breaks in
+ strings: a single CR (carriage return) character, a single LF (line-
feed) character, the two-character sequence CRLF, any of the three pre-
- ceding, any Unicode newline sequence, or the NUL character (binary
- zero). The pcre2api page has further discussion about newlines, and
+ ceding, any Unicode newline sequence, or the NUL character (binary
+ zero). The pcre2api page has further discussion about newlines, and
shows how to set the newline convention when calling pcre2_compile().
- It is also possible to specify a newline convention by starting a pat-
+ It is also possible to specify a newline convention by starting a pat-
tern string with one of the following sequences:
(*CR) carriage return
@@ -6220,7 +6040,7 @@ SPECIAL START-OF-PATTERN ITEMS
(*NUL) the NUL character (binary zero)
These override the default and the options given to the compiling func-
- tion. For example, on a Unix system where LF is the default newline
+ tion. For example, on a Unix system where LF is the default newline
sequence, the pattern
(*CR)a.b
@@ -6229,39 +6049,39 @@ SPECIAL START-OF-PATTERN ITEMS
no longer a newline. If more than one of these settings is present, the
last one is used.
- The newline convention affects where the circumflex and dollar asser-
+ The newline convention affects where the circumflex and dollar asser-
tions are true. It also affects the interpretation of the dot metachar-
- acter when PCRE2_DOTALL is not set, and the behaviour of \N when not
- followed by an opening brace. However, it does not affect what the \R
- escape sequence matches. By default, this is any Unicode newline
+ acter when PCRE2_DOTALL is not set, and the behaviour of \N when not
+ followed by an opening brace. However, it does not affect what the \R
+ escape sequence matches. By default, this is any Unicode newline
sequence, for Perl compatibility. However, this can be changed; see the
next section and the description of \R in the section entitled "Newline
- sequences" below. A change of \R setting can be combined with a change
+ sequences" below. A change of \R setting can be combined with a change
of newline convention.
Specifying what \R matches
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
- the complete set of Unicode line endings) by setting the option
- PCRE2_BSR_ANYCRLF at compile time. This effect can also be achieved by
- starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNI-
+ the complete set of Unicode line endings) by setting the option
+ PCRE2_BSR_ANYCRLF at compile time. This effect can also be achieved by
+ starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNI-
CODE) is also recognized, corresponding to PCRE2_BSR_UNICODE.
EBCDIC CHARACTER CODES
- PCRE2 can be compiled to run in an environment that uses EBCDIC as its
- character code instead of ASCII or Unicode (typically a mainframe sys-
- tem). In the sections below, character code values are ASCII or Uni-
+ PCRE2 can be compiled to run in an environment that uses EBCDIC as its
+ character code instead of ASCII or Unicode (typically a mainframe sys-
+ tem). In the sections below, character code values are ASCII or Uni-
code; in an EBCDIC environment these characters may have different code
values, and there are no code points greater than 255.
CHARACTERS AND METACHARACTERS
- A regular expression is a pattern that is matched against a subject
- string from left to right. Most characters stand for themselves in a
- pattern, and match the corresponding characters in the subject. As a
+ A regular expression is a pattern that is matched against a subject
+ string from left to right. Most characters stand for themselves in a
+ pattern, and match the corresponding characters in the subject. As a
trivial example, the pattern
The quick brown fox
@@ -6270,11 +6090,10 @@ CHARACTERS AND METACHARACTERS
caseless matching is specified (the PCRE2_CASELESS option), letters are
matched independently of case.
- The power of regular expressions comes from the ability to include wild
- cards, character classes, alternatives, and repetitions in the pattern.
- These are encoded in the pattern by the use of metacharacters, which do
- not stand for themselves but instead are interpreted in some special
- way.
+ The power of regular expressions comes from the ability to include
+ alternatives and repetitions in the pattern. These are encoded in the
+ pattern by the use of metacharacters, which do not stand for themselves
+ but instead are interpreted in some special way.
There are two different sets of metacharacters: those that are recog-
nized anywhere in the pattern except within square brackets, and those
@@ -6287,11 +6106,14 @@ CHARACTERS AND METACHARACTERS
. match any character except newline (by default)
[ start character class definition
| start of alternative branch
- ( start group or control verb
- ) end group or control verb
+ ( start subpattern
+ ) end subpattern
+ ? extends the meaning of (
+ also 0 or 1 quantifier
+ also quantifier minimizer
* 0 or more quantifier
- + 1 or more quantifier; also "possessive quantifier"
- ? 0 or 1 quantifier; also quantifier minimizer
+ + 1 or more quantifier
+ also "possessive quantifier"
{ start min/max quantifier
Part of a pattern that is in square brackets is called a "character
@@ -6300,7 +6122,8 @@ CHARACTERS AND METACHARACTERS
\ general escape character
^ negate the class, but only if the first character
- indicates character range
- [ POSIX character class (if followed by POSIX syntax)
+ [ POSIX character class (only if followed by POSIX
+ syntax)
] terminates the character class
The following sections describe the use of each of the metacharacters.
@@ -6309,7 +6132,7 @@ CHARACTERS AND METACHARACTERS
BACKSLASH
The backslash character has several uses. Firstly, if it is followed by
- a character that is not a digit or a letter, it takes away any special
+ a character that is not a number or a letter, it takes away any special
meaning that character may have. This use of backslash as an escape
character applies both inside and outside character classes.
@@ -6320,7 +6143,7 @@ BACKSLASH
that it stands for itself. In particular, if you want to match a back-
slash, you write \\.
- In a UTF mode, only ASCII digits and letters have any special meaning
+ In a UTF mode, only ASCII numbers and letters have any special meaning
after a backslash. All other characters (in particular, those whose
code points are greater than 127) are treated as literals.
@@ -6330,14 +6153,14 @@ BACKSLASH
are ignored. An escaping backslash can be used to include a white space
or # character as part of the pattern.
- If you want to treat all characters in a sequence as literals, you can
- do so by putting them between \Q and \E. This is different from Perl in
- that $ and @ are handled as literals in \Q...\E sequences in PCRE2,
- whereas in Perl, $ and @ cause variable interpolation. Also, Perl does
- "double-quotish backslash interpolation" on any backslashes between \Q
- and \E which, its documentation says, "may lead to confusing results".
- PCRE2 treats a backslash between \Q and \E just like any other charac-
- ter. Note the following examples:
+ If you want to remove the special meaning from a sequence of charac-
+ ters, you can do so by putting them between \Q and \E. This is differ-
+ ent from Perl in that $ and @ are handled as literals in \Q...\E
+ sequences in PCRE2, whereas in Perl, $ and @ cause variable interpola-
+ tion. Also, Perl does "double-quotish backslash interpolation" on any
+ backslashes between \Q and \E which, its documentation says, "may lead
+ to confusing results". PCRE2 treats a backslash between \Q and \E just
+ like any other character. Note the following examples:
Pattern PCRE2 matches Perl matches
@@ -6362,16 +6185,15 @@ BACKSLASH
acters in patterns in a visible manner. There is no restriction on the
appearance of non-printing characters in a pattern, but when a pattern
is being prepared by text editing, it is often easier to use one of the
- following escape sequences instead of the binary character it repre-
- sents. In an ASCII or Unicode environment, these escapes are as fol-
- lows:
+ following escape sequences than the binary character it represents. In
+ an ASCII or Unicode environment, these escapes are as follows:
\a alarm, that is, the BEL character (hex 07)
\cx "control-x", where x is any printable ASCII character
\e escape (hex 1B)
\f form feed (hex 0C)
\n linefeed (hex 0A)
- \r carriage return (hex 0D) (but see below)
+ \r carriage return (hex 0D)
\t tab (hex 09)
\0dd character with octal code 0dd
\ddd character with octal code ddd, or backreference
@@ -6379,45 +6201,15 @@ BACKSLASH
\xhh character with hex code hh
\x{hhh..} character with hex code hhh..
\N{U+hhh..} character with Unicode hex code point hhh..
+ \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
- By default, after \x that is not followed by {, from zero to two hexa-
- decimal digits are read (letters can be in upper or lower case). Any
- number of hexadecimal digits may appear between \x{ and }. If a charac-
- ter other than a hexadecimal digit appears between \x{ and }, or if
- there is no terminating }, an error occurs.
-
- Characters whose code points are less than 256 can be defined by either
- of the two syntaxes for \x or by an octal sequence. There is no differ-
- ence in the way they are handled. For example, \xdc is exactly the same
- as \x{dc} or \334. However, using the braced versions does make such
- sequences easier to read.
-
- Support is available for some ECMAScript (aka JavaScript) escape
- sequences via two compile-time options. If PCRE2_ALT_BSUX is set, the
- sequence \x followed by { is not recognized. Only if \x is followed by
- two hexadecimal digits is it recognized as a character escape. Other-
- wise it is interpreted as a literal "x" character. In this mode, sup-
- port for code points greater than 256 is provided by \u, which must be
- followed by four hexadecimal digits; otherwise it is interpreted as a
- literal "u" character.
-
- PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in
- addition, \u{hhh..} is recognized as the character specified by hexa-
- decimal code point. There may be any number of hexadecimal digits.
- This syntax is from ECMAScript 6.
-
- The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF
+ The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF
option is set, that is, when PCRE2 is operating in a Unicode mode. Perl
- also uses \N{name} to specify characters by Unicode name; PCRE2 does
- not support this. Note that when \N is not followed by an opening
- brace (curly bracket) it has an entirely different meaning, matching
+ also uses \N{name} to specify characters by Unicode name; PCRE2 does
+ not support this. Note that when \N is not followed by an opening
+ brace (curly bracket) it has an entirely different meaning, matching
any character that is not a newline.
- There are some legacy applications where the escape sequence \r is
- expected to match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option
- is set, \r in a pattern is converted to \n so that it matches a LF
- (linefeed) instead of a CR (carriage return) character.
-
The precise effect of \cx on ASCII characters is as follows: if x is a
lower case letter, it is converted to upper case. Then bit 6 of the
character (hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A
@@ -6470,11 +6262,11 @@ BACKSLASH
Outside a character class, PCRE2 reads the digit and any following dig-
its as a decimal number. If the number is less than 10, begins with the
- digit 8 or 9, or if there are at least that many previous capture
- groups in the expression, the entire sequence is taken as a backrefer-
- ence. A description of how this works is given later, following the
- discussion of parenthesized groups. Otherwise, up to three octal dig-
- its are read to form a character code.
+ digit 8 or 9, or if there are at least that many previous capturing
+ left parentheses in the expression, the entire sequence is taken as a
+ backreference. A description of how this works is given later, follow-
+ ing the discussion of parenthesized subpatterns. Otherwise, up to
+ three octal digits are read to form a character code.
Inside a character class, PCRE2 handles \8 and \9 as the literal char-
acters "8" and "9", and otherwise reads up to three octal digits fol-
@@ -6484,7 +6276,7 @@ BACKSLASH
\040 is another way of writing an ASCII space
\40 is the same, provided there are fewer than 40
- previous capture groups
+ previous capturing subpatterns
\7 is always a backreference
\11 might be a backreference, or another way of
writing a tab
@@ -6500,6 +6292,24 @@ BACKSLASH
syntax must not be introduced by a leading zero, because no more than
three octal digits are ever read.
+ By default, after \x that is not followed by {, from zero to two hexa-
+ decimal digits are read (letters can be in upper or lower case). Any
+ number of hexadecimal digits may appear between \x{ and }. If a charac-
+ ter other than a hexadecimal digit appears between \x{ and }, or if
+ there is no terminating }, an error occurs.
+
+ If the PCRE2_ALT_BSUX option is set, the interpretation of \x is as
+ just described only when it is followed by two hexadecimal digits. Oth-
+ erwise, it matches a literal "x" character. In this mode, support for
+ code points greater than 256 is provided by \u, which must be followed
+ by four hexadecimal digits; otherwise it matches a literal "u" charac-
+ ter.
+
+ Characters whose value is less than 256 can be defined by either of the
+ two syntaxes for \x (or by \u in PCRE2_ALT_BSUX mode). There is no dif-
+ ference in the way they are handled. For example, \xdc is exactly the
+ same as \x{dc} (or \u00dc in PCRE2_ALT_BSUX mode).
+
Constraints on character values
Characters that are specified using octal or hexadecimal numbers are
@@ -6532,24 +6342,23 @@ BACKSLASH
In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its
string handler and used to modify the case of following characters. By
- default, PCRE2 does not support these escape sequences in patterns.
- However, if either of the PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX
- options is set, \U matches a "U" character, and \u can be used to
- define a character by code point, as described above.
+ default, PCRE2 does not support these escape sequences. However, if the
+ PCRE2_ALT_BSUX option is set, \U matches a "U" character, and \u can be
+ used to define a character by code point, as described above.
Absolute and relative backreferences
- The sequence \g followed by a signed or unsigned number, optionally
- enclosed in braces, is an absolute or relative backreference. A named
- backreference can be coded as \g{name}. Backreferences are discussed
- later, following the discussion of parenthesized groups.
+ The sequence \g followed by a signed or unsigned number, optionally
+ enclosed in braces, is an absolute or relative backreference. A named
+ backreference can be coded as \g{name}. Backreferences are discussed
+ later, following the discussion of parenthesized subpatterns.
Absolute and relative subroutine calls
- For compatibility with Oniguruma, the non-Perl syntax \g followed by a
+ For compatibility with Oniguruma, the non-Perl syntax \g followed by a
name or a number enclosed either in angle brackets or single quotes, is
- an alternative syntax for referencing a capture group as a subroutine.
- Details are discussed later. Note that \g{...} (Perl syntax) and
+ an alternative syntax for referencing a subpattern as a "subroutine".
+ Details are discussed later. Note that \g{...} (Perl syntax) and
\g<...> (Oniguruma syntax) are not synonymous. The former is a backref-
erence; the latter is a subroutine call.
@@ -6569,58 +6378,58 @@ BACKSLASH
\w any "word" character
\W any "non-word" character
- The \N escape sequence has the same meaning as the "." metacharacter
- when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change
+ The \N escape sequence has the same meaning as the "." metacharacter
+ when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change
the meaning of \N. Note that when \N is followed by an opening brace it
has a different meaning. See the section entitled "Non-printing charac-
- ters" above for details. Perl also uses \N{name} to specify characters
+ ters" above for details. Perl also uses \N{name} to specify characters
by Unicode name; PCRE2 does not support this.
- Each pair of lower and upper case escape sequences partitions the com-
- plete set of characters into two disjoint sets. Any given character
- matches one, and only one, of each pair. The sequences can appear both
- inside and outside character classes. They each match one character of
- the appropriate type. If the current matching point is at the end of
- the subject string, all of them fail, because there is no character to
+ Each pair of lower and upper case escape sequences partitions the com-
+ plete set of characters into two disjoint sets. Any given character
+ matches one, and only one, of each pair. The sequences can appear both
+ inside and outside character classes. They each match one character of
+ the appropriate type. If the current matching point is at the end of
+ the subject string, all of them fail, because there is no character to
match.
- The default \s characters are HT (9), LF (10), VT (11), FF (12), CR
- (13), and space (32), which are defined as white space in the "C"
+ The default \s characters are HT (9), LF (10), VT (11), FF (12), CR
+ (13), and space (32), which are defined as white space in the "C"
locale. This list may vary if locale-specific matching is taking place.
- For example, in some locales the "non-breaking space" character (\xA0)
+ For example, in some locales the "non-breaking space" character (\xA0)
is recognized as white space, and in others the VT character is not.
- A "word" character is an underscore or any character that is a letter
- or digit. By default, the definition of letters and digits is con-
+ A "word" character is an underscore or any character that is a letter
+ or digit. By default, the definition of letters and digits is con-
trolled by PCRE2's low-valued character tables, and may vary if locale-
specific matching is taking place (see "Locale support" in the pcre2api
- page). For example, in a French locale such as "fr_FR" in Unix-like
- systems, or "french" in Windows, some character codes greater than 127
- are used for accented letters, and these are then matched by \w. The
+ page). For example, in a French locale such as "fr_FR" in Unix-like
+ systems, or "french" in Windows, some character codes greater than 127
+ are used for accented letters, and these are then matched by \w. The
use of locales with Unicode is discouraged.
- By default, characters whose code points are greater than 127 never
+ By default, characters whose code points are greater than 127 never
match \d, \s, or \w, and always match \D, \S, and \W, although this may
- be different for characters in the range 128-255 when locale-specific
- matching is happening. These escape sequences retain their original
- meanings from before Unicode support was available, mainly for effi-
- ciency reasons. If the PCRE2_UCP option is set, the behaviour is
- changed so that Unicode properties are used to determine character
+ be different for characters in the range 128-255 when locale-specific
+ matching is happening. These escape sequences retain their original
+ meanings from before Unicode support was available, mainly for effi-
+ ciency reasons. If the PCRE2_UCP option is set, the behaviour is
+ changed so that Unicode properties are used to determine character
types, as follows:
\d any character that matches \p{Nd} (decimal digit)
\s any character that matches \p{Z} or \h or \v
\w any character that matches \p{L} or \p{N}, plus underscore
- The upper case escapes match the inverse sets of characters. Note that
- \d matches only decimal digits, whereas \w matches any Unicode digit,
+ The upper case escapes match the inverse sets of characters. Note that
+ \d matches only decimal digits, whereas \w matches any Unicode digit,
as well as any Unicode letter, and underscore. Note also that PCRE2_UCP
- affects \b, and \B because they are defined in terms of \w and \W.
+ affects \b, and \B because they are defined in terms of \w and \W.
Matching these sequences is noticeably slower when PCRE2_UCP is set.
- The sequences \h, \H, \v, and \V, in contrast to the other sequences,
- which match only ASCII characters by default, always match a specific
- list of code points, whether or not PCRE2_UCP is set. The horizontal
+ The sequences \h, \H, \v, and \V, in contrast to the other sequences,
+ which match only ASCII characters by default, always match a specific
+ list of code points, whether or not PCRE2_UCP is set. The horizontal
space characters are:
U+0009 Horizontal tab (HT)
@@ -6653,36 +6462,36 @@ BACKSLASH
U+2028 Line separator
U+2029 Paragraph separator
- In 8-bit, non-UTF-8 mode, only the characters with code points less
+ In 8-bit, non-UTF-8 mode, only the characters with code points less
than 256 are relevant.
Newline sequences
- Outside a character class, by default, the escape sequence \R matches
- any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent
+ Outside a character class, by default, the escape sequence \R matches
+ any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent
to the following:
(?>\r\n|\n|\x0b|\f|\r|\x85)
- This is an example of an "atomic group", details of which are given
+ This is an example of an "atomic group", details of which are given
below. This particular group matches either the two-character sequence
- CR followed by LF, or one of the single characters LF (linefeed,
- U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car-
- riage return, U+000D), or NEL (next line, U+0085). Because this is an
- atomic group, the two-character sequence is treated as a single unit
+ CR followed by LF, or one of the single characters LF (linefeed,
+ U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car-
+ riage return, U+000D), or NEL (next line, U+0085). Because this is an
+ atomic group, the two-character sequence is treated as a single unit
that cannot be split.
In other modes, two additional characters whose code points are greater
than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
- rator, U+2029). Unicode support is not needed for these characters to
+ rator, U+2029). Unicode support is not needed for these characters to
be recognized.
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
- the complete set of Unicode line endings) by setting the option
- PCRE2_BSR_ANYCRLF at compile time. (BSR is an abbrevation for "back-
+ the complete set of Unicode line endings) by setting the option
+ PCRE2_BSR_ANYCRLF at compile time. (BSR is an abbrevation for "back-
slash R".) This can be made the default when PCRE2 is built; if this is
- the case, the other behaviour can be requested via the PCRE2_BSR_UNI-
- CODE option. It is also possible to specify these settings by starting
+ the case, the other behaviour can be requested via the PCRE2_BSR_UNI-
+ CODE option. It is also possible to specify these settings by starting
a pattern string with one of the following sequences:
(*BSR_ANYCRLF) CR, LF, or CRLF only
@@ -6690,77 +6499,72 @@ BACKSLASH
These override the default and the options given to the compiling func-
tion. Note that these special settings, which are not Perl-compatible,
- are recognized only at the very start of a pattern, and that they must
- be in upper case. If more than one of them is present, the last one is
- used. They can be combined with a change of newline convention; for
+ are recognized only at the very start of a pattern, and that they must
+ be in upper case. If more than one of them is present, the last one is
+ used. They can be combined with a change of newline convention; for
example, a pattern can start with:
(*ANY)(*BSR_ANYCRLF)
- They can also be combined with the (*UTF) or (*UCP) special sequences.
- Inside a character class, \R is treated as an unrecognized escape
+ They can also be combined with the (*UTF) or (*UCP) special sequences.
+ Inside a character class, \R is treated as an unrecognized escape
sequence, and causes an error.
Unicode character properties
- When PCRE2 is built with Unicode support (the default), three addi-
- tional escape sequences that match characters with specific properties
- are available. They can be used in any mode, though in 8-bit and 16-bit
- non-UTF modes these sequences are of course limited to testing charac-
- ters whose code points are less than U+0100 and U+10000, respectively.
- In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode
- limit) may be encountered. These are all treated as being in the
- Unknown script and with an unassigned type. The extra escape sequences
- are:
+ When PCRE2 is built with Unicode support (the default), three addi-
+ tional escape sequences that match characters with specific properties
+ are available. In 8-bit non-UTF-8 mode, these sequences are of course
+ limited to testing characters whose code points are less than 256, but
+ they do work in this mode. In 32-bit non-UTF mode, code points greater
+ than 0x10ffff (the Unicode limit) may be encountered. These are all
+ treated as being in the Common script and with an unassigned type. The
+ extra escape sequences are:
\p{xx} a character with the xx property
\P{xx} a character without the xx property
\X a Unicode extended grapheme cluster
- The property names represented by xx above are case-sensitive. There is
- support for Unicode script names, Unicode general category properties,
- "Any", which matches any character (including newline), and some spe-
- cial PCRE2 properties (described in the next section). Other Perl
- properties such as "InMusicalSymbols" are not supported by PCRE2. Note
- that \P{Any} does not match any characters, so always causes a match
- failure.
+ The property names represented by xx above are limited to the Unicode
+ script names, the general category properties, "Any", which matches any
+ character (including newline), and some special PCRE2 properties
+ (described in the next section). Other Perl properties such as "InMu-
+ sicalSymbols" are not supported by PCRE2. Note that \P{Any} does not
+ match any characters, so always causes a match failure.
Sets of Unicode characters are defined as belonging to certain scripts.
- A character from one of these sets can be matched using a script name.
+ A character from one of these sets can be matched using a script name.
For example:
\p{Greek}
\P{Han}
- Unassigned characters (and in non-UTF 32-bit mode, characters with code
- points greater than 0x10FFFF) are assigned the "Unknown" script. Others
- that are not part of an identified script are lumped together as "Com-
- mon". The current list of scripts is:
-
- Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
- nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
- Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
- nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
- Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
- Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
- Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
- Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
- Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
- nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
- Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
- jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
+ Those that are not part of an identified script are lumped together as
+ "Common". The current list of scripts is:
+
+ Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
+ nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
+ Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
+ nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
+ Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
+ Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
+ Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
+ Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
+ Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
+ nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
+ Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
+ jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
Medefaidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
- Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
- Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
- ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
- dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
+ Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
+ Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
- Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
- vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
- Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
- Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
- nagh, Tirhuta, Ugaritic, Unknown, Vai, Warang_Citi, Yi, Zan-
- abazar_Square.
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ nagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi, Zanabazar_Square.
Each character has exactly one Unicode general category property, spec-
ified by a two-letter abbreviation. For compatibility with Perl, nega-
@@ -6826,13 +6630,11 @@ BACKSLASH
has the Lu, Ll, or Lt property, in other words, a letter that is not
classified as a modifier or "other".
- The Cs (Surrogate) property applies only to characters whose code
- points are in the range U+D800 to U+DFFF. These characters are no dif-
- ferent to any other character when PCRE2 is not in UTF mode (using the
- 16-bit or 32-bit library). However, they are not valid in Unicode
- strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid-
- ity checking has been turned off (see the discussion of
- PCRE2_NO_UTF_CHECK in the pcre2api page).
+ The Cs (Surrogate) property applies only to characters in the range
+ U+D800 to U+DFFF. Such characters are not valid in Unicode strings and
+ so cannot be tested by PCRE2, unless UTF validity checking has been
+ turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api
+ page). Perl does not support the Cs property.
The long synonyms for property names that Perl supports (such as
\p{Letter}) are not supported by PCRE2, nor is it permitted to prefix
@@ -6973,8 +6775,8 @@ BACKSLASH
The final use of backslash is for certain simple assertions. An asser-
tion specifies a condition that has to be met at a particular point in
a match, without consuming any characters from the subject string. The
- use of groups for more complicated assertions is described below. The
- backslashed assertions are:
+ use of subpatterns for more complicated assertions is described below.
+ The backslashed assertions are:
\b matches at a word boundary
\B matches when not at a word boundary
@@ -6991,184 +6793,183 @@ BACKSLASH
A word boundary is a position in the subject string where the current
character and the previous character do not both match \w or \W (i.e.
one matches \w and the other matches \W), or the start or end of the
- string if the first or last character matches \w, respectively. When
- PCRE2 is built with Unicode support, the meanings of \w and \W can be
- changed by setting the PCRE2_UCP option. When this is done, it also
- affects \b and \B. Neither PCRE2 nor Perl has a separate "start of
- word" or "end of word" metasequence. However, whatever follows \b nor-
- mally determines which it is. For example, the fragment \ba matches "a"
- at the start of a word.
-
- The \A, \Z, and \z assertions differ from the traditional circumflex
+ string if the first or last character matches \w, respectively. In a
+ UTF mode, the meanings of \w and \W can be changed by setting the
+ PCRE2_UCP option. When this is done, it also affects \b and \B. Neither
+ PCRE2 nor Perl has a separate "start of word" or "end of word" metase-
+ quence. However, whatever follows \b normally determines which it is.
+ For example, the fragment \ba matches "a" at the start of a word.
+
+ The \A, \Z, and \z assertions differ from the traditional circumflex
and dollar (described in the next section) in that they only ever match
- at the very start and end of the subject string, whatever options are
- set. Thus, they are independent of multiline mode. These three asser-
- tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options,
- which affect only the behaviour of the circumflex and dollar metachar-
- acters. However, if the startoffset argument of pcre2_match() is non-
- zero, indicating that matching is to start at a point other than the
- beginning of the subject, \A can never match. The difference between
- \Z and \z is that \Z matches before a newline at the end of the string
+ at the very start and end of the subject string, whatever options are
+ set. Thus, they are independent of multiline mode. These three asser-
+ tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options,
+ which affect only the behaviour of the circumflex and dollar metachar-
+ acters. However, if the startoffset argument of pcre2_match() is non-
+ zero, indicating that matching is to start at a point other than the
+ beginning of the subject, \A can never match. The difference between
+ \Z and \z is that \Z matches before a newline at the end of the string
as well as at the very end, whereas \z matches only at the end.
- The \G assertion is true only when the current matching position is at
- the start point of the matching process, as specified by the startoff-
- set argument of pcre2_match(). It differs from \A when the value of
- startoffset is non-zero. By calling pcre2_match() multiple times with
- appropriate arguments, you can mimic Perl's /g option, and it is in
+ The \G assertion is true only when the current matching position is at
+ the start point of the matching process, as specified by the startoff-
+ set argument of pcre2_match(). It differs from \A when the value of
+ startoffset is non-zero. By calling pcre2_match() multiple times with
+ appropriate arguments, you can mimic Perl's /g option, and it is in
this kind of implementation where \G can be useful.
- Note, however, that PCRE2's implementation of \G, being true at the
- starting character of the matching process, is subtly different from
- Perl's, which defines it as true at the end of the previous match. In
- Perl, these can be different when the previously matched string was
+ Note, however, that PCRE2's implementation of \G, being true at the
+ starting character of the matching process, is subtly different from
+ Perl's, which defines it as true at the end of the previous match. In
+ Perl, these can be different when the previously matched string was
empty. Because PCRE2 does just one match at a time, it cannot reproduce
this behaviour.
- If all the alternatives of a pattern begin with \G, the expression is
+ If all the alternatives of a pattern begin with \G, the expression is
anchored to the starting match position, and the "anchored" flag is set
in the compiled regular expression.
CIRCUMFLEX AND DOLLAR
- The circumflex and dollar metacharacters are zero-width assertions.
- That is, they test for a particular condition being true without con-
+ The circumflex and dollar metacharacters are zero-width assertions.
+ That is, they test for a particular condition being true without con-
suming any characters from the subject string. These two metacharacters
- are concerned with matching the starts and ends of lines. If the new-
- line convention is set so that only the two-character sequence CRLF is
- recognized as a newline, isolated CR and LF characters are treated as
+ are concerned with matching the starts and ends of lines. If the new-
+ line convention is set so that only the two-character sequence CRLF is
+ recognized as a newline, isolated CR and LF characters are treated as
ordinary data characters, and are not recognized as newlines.
Outside a character class, in the default matching mode, the circumflex
- character is an assertion that is true only if the current matching
- point is at the start of the subject string. If the startoffset argu-
- ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
- flex can never match if the PCRE2_MULTILINE option is unset. Inside a
- character class, circumflex has an entirely different meaning (see
+ character is an assertion that is true only if the current matching
+ point is at the start of the subject string. If the startoffset argu-
+ ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
+ flex can never match if the PCRE2_MULTILINE option is unset. Inside a
+ character class, circumflex has an entirely different meaning (see
below).
- Circumflex need not be the first character of the pattern if a number
- of alternatives are involved, but it should be the first thing in each
- alternative in which it appears if the pattern is ever to match that
- branch. If all possible alternatives start with a circumflex, that is,
- if the pattern is constrained to match only at the start of the sub-
- ject, it is said to be an "anchored" pattern. (There are also other
+ Circumflex need not be the first character of the pattern if a number
+ of alternatives are involved, but it should be the first thing in each
+ alternative in which it appears if the pattern is ever to match that
+ branch. If all possible alternatives start with a circumflex, that is,
+ if the pattern is constrained to match only at the start of the sub-
+ ject, it is said to be an "anchored" pattern. (There are also other
constructs that can cause a pattern to be anchored.)
- The dollar character is an assertion that is true only if the current
- matching point is at the end of the subject string, or immediately
- before a newline at the end of the string (by default), unless
+ The dollar character is an assertion that is true only if the current
+ matching point is at the end of the subject string, or immediately
+ before a newline at the end of the string (by default), unless
PCRE2_NOTEOL is set. Note, however, that it does not actually match the
newline. Dollar need not be the last character of the pattern if a num-
ber of alternatives are involved, but it should be the last item in any
- branch in which it appears. Dollar has no special meaning in a charac-
+ branch in which it appears. Dollar has no special meaning in a charac-
ter class.
- The meaning of dollar can be changed so that it matches only at the
- very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
+ The meaning of dollar can be changed so that it matches only at the
+ very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
compile time. This does not affect the \Z assertion.
The meanings of the circumflex and dollar metacharacters are changed if
- the PCRE2_MULTILINE option is set. When this is the case, a dollar
- character matches before any newlines in the string, as well as at the
- very end, and a circumflex matches immediately after internal newlines
- as well as at the start of the subject string. It does not match after
- a newline that ends the string, for compatibility with Perl. However,
+ the PCRE2_MULTILINE option is set. When this is the case, a dollar
+ character matches before any newlines in the string, as well as at the
+ very end, and a circumflex matches immediately after internal newlines
+ as well as at the start of the subject string. It does not match after
+ a newline that ends the string, for compatibility with Perl. However,
this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option.
- For example, the pattern /^abc$/ matches the subject string "def\nabc"
- (where \n represents a newline) in multiline mode, but not otherwise.
- Consequently, patterns that are anchored in single line mode because
- all branches start with ^ are not anchored in multiline mode, and a
- match for circumflex is possible when the startoffset argument of
- pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
+ For example, the pattern /^abc$/ matches the subject string "def\nabc"
+ (where \n represents a newline) in multiline mode, but not otherwise.
+ Consequently, patterns that are anchored in single line mode because
+ all branches start with ^ are not anchored in multiline mode, and a
+ match for circumflex is possible when the startoffset argument of
+ pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
if PCRE2_MULTILINE is set.
- When the newline convention (see "Newline conventions" below) recog-
- nizes the two-character sequence CRLF as a newline, this is preferred,
- even if the single characters CR and LF are also recognized as new-
- lines. For example, if the newline convention is "any", a multiline
- mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
- than after CR, even though CR on its own is a valid newline. (It also
+ When the newline convention (see "Newline conventions" below) recog-
+ nizes the two-character sequence CRLF as a newline, this is preferred,
+ even if the single characters CR and LF are also recognized as new-
+ lines. For example, if the newline convention is "any", a multiline
+ mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
+ than after CR, even though CR on its own is a valid newline. (It also
matches at the very start of the string, of course.)
- Note that the sequences \A, \Z, and \z can be used to match the start
- and end of the subject in both modes, and if all branches of a pattern
- start with \A it is always anchored, whether or not PCRE2_MULTILINE is
+ Note that the sequences \A, \Z, and \z can be used to match the start
+ and end of the subject in both modes, and if all branches of a pattern
+ start with \A it is always anchored, whether or not PCRE2_MULTILINE is
set.
FULL STOP (PERIOD, DOT) AND \N
Outside a character class, a dot in the pattern matches any one charac-
- ter in the subject string except (by default) a character that signi-
+ ter in the subject string except (by default) a character that signi-
fies the end of a line.
- When a line ending is defined as a single character, dot never matches
- that character; when the two-character sequence CRLF is used, dot does
- not match CR if it is immediately followed by LF, but otherwise it
- matches all characters (including isolated CRs and LFs). When any Uni-
- code line endings are being recognized, dot does not match CR or LF or
+ When a line ending is defined as a single character, dot never matches
+ that character; when the two-character sequence CRLF is used, dot does
+ not match CR if it is immediately followed by LF, but otherwise it
+ matches all characters (including isolated CRs and LFs). When any Uni-
+ code line endings are being recognized, dot does not match CR or LF or
any of the other line ending characters.
- The behaviour of dot with regard to newlines can be changed. If the
- PCRE2_DOTALL option is set, a dot matches any one character, without
- exception. If the two-character sequence CRLF is present in the sub-
+ The behaviour of dot with regard to newlines can be changed. If the
+ PCRE2_DOTALL option is set, a dot matches any one character, without
+ exception. If the two-character sequence CRLF is present in the sub-
ject string, it takes two dots to match it.
- The handling of dot is entirely independent of the handling of circum-
- flex and dollar, the only relationship being that they both involve
+ The handling of dot is entirely independent of the handling of circum-
+ flex and dollar, the only relationship being that they both involve
newlines. Dot has no special meaning in a character class.
- The escape sequence \N when not followed by an opening brace behaves
- like a dot, except that it is not affected by the PCRE2_DOTALL option.
- In other words, it matches any character except one that signifies the
+ The escape sequence \N when not followed by an opening brace behaves
+ like a dot, except that it is not affected by the PCRE2_DOTALL option.
+ In other words, it matches any character except one that signifies the
end of a line.
When \N is followed by an opening brace it has a different meaning. See
- the section entitled "Non-printing characters" above for details. Perl
- also uses \N{name} to specify characters by Unicode name; PCRE2 does
+ the section entitled "Non-printing characters" above for details. Perl
+ also uses \N{name} to specify characters by Unicode name; PCRE2 does
not support this.
MATCHING A SINGLE CODE UNIT
- Outside a character class, the escape sequence \C matches any one code
- unit, whether or not a UTF mode is set. In the 8-bit library, one code
- unit is one byte; in the 16-bit library it is a 16-bit unit; in the
- 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches
- line-ending characters. The feature is provided in Perl in order to
+ Outside a character class, the escape sequence \C matches any one code
+ unit, whether or not a UTF mode is set. In the 8-bit library, one code
+ unit is one byte; in the 16-bit library it is a 16-bit unit; in the
+ 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches
+ line-ending characters. The feature is provided in Perl in order to
match individual bytes in UTF-8 mode, but it is unclear how it can use-
fully be used.
- Because \C breaks up characters into individual code units, matching
- one unit with \C in UTF-8 or UTF-16 mode means that the rest of the
- string may start with a malformed UTF character. This has undefined
+ Because \C breaks up characters into individual code units, matching
+ one unit with \C in UTF-8 or UTF-16 mode means that the rest of the
+ string may start with a malformed UTF character. This has undefined
results, because PCRE2 assumes that it is matching character by charac-
- ter in a valid UTF string (by default it checks the subject string's
- validity at the start of processing unless the PCRE2_NO_UTF_CHECK
+ ter in a valid UTF string (by default it checks the subject string's
+ validity at the start of processing unless the PCRE2_NO_UTF_CHECK
option is used).
- An application can lock out the use of \C by setting the
- PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also
+ An application can lock out the use of \C by setting the
+ PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also
possible to build PCRE2 with the use of \C permanently disabled.
- PCRE2 does not allow \C to appear in lookbehind assertions (described
- below) in UTF-8 or UTF-16 modes, because this would make it impossible
- to calculate the length of the lookbehind. Neither the alternative
+ PCRE2 does not allow \C to appear in lookbehind assertions (described
+ below) in UTF-8 or UTF-16 modes, because this would make it impossible
+ to calculate the length of the lookbehind. Neither the alternative
matching function pcre2_dfa_match() nor the JIT optimizer support \C in
these UTF modes. The former gives a match-time error; the latter fails
to optimize and so the match is always run using the interpreter.
- In the 32-bit library, however, \C is always supported (when not
- explicitly locked out) because it always matches a single code unit,
+ In the 32-bit library, however, \C is always supported (when not
+ explicitly locked out) because it always matches a single code unit,
whether or not UTF-32 is specified.
In general, the \C escape sequence is best avoided. However, one way of
- using it that avoids the problem of malformed UTF-8 or UTF-16 charac-
- ters is to use a lookahead to check the length of the next character,
- as in this pattern, which could be used with a UTF-8 string (ignore
+ using it that avoids the problem of malformed UTF-8 or UTF-16 charac-
+ ters is to use a lookahead to check the length of the next character,
+ as in this pattern, which could be used with a UTF-8 string (ignore
white space and line breaks):
(?| (?=[\x00-\x7f])(\C) |
@@ -7176,11 +6977,11 @@ MATCHING A SINGLE CODE UNIT
(?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
(?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
- In this example, a group that starts with (?| resets the capturing
- parentheses numbers in each alternative (see "Duplicate Group Numbers"
- below). The assertions at the start of each branch check the next UTF-8
- character for values whose encoding uses 1, 2, 3, or 4 bytes, respec-
- tively. The character's individual bytes are then captured by the
+ In this example, a group that starts with (?| resets the capturing
+ parentheses numbers in each alternative (see "Duplicate Subpattern Num-
+ bers" below). The assertions at the start of each branch check the next
+ UTF-8 character for values whose encoding uses 1, 2, 3, or 4 bytes,
+ respectively. The character's individual bytes are then captured by the
appropriate number of \C groups.
@@ -7188,115 +6989,115 @@ SQUARE BRACKETS AND CHARACTER CLASSES
An opening square bracket introduces a character class, terminated by a
closing square bracket. A closing square bracket on its own is not spe-
- cial by default. If a closing square bracket is required as a member
+ cial by default. If a closing square bracket is required as a member
of the class, it should be the first data character in the class (after
- an initial circumflex, if present) or escaped with a backslash. This
- means that, by default, an empty class cannot be defined. However, if
- the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at
+ an initial circumflex, if present) or escaped with a backslash. This
+ means that, by default, an empty class cannot be defined. However, if
+ the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at
the start does end the (empty) class.
- A character class matches a single character in the subject. A matched
+ A character class matches a single character in the subject. A matched
character must be in the set of characters defined by the class, unless
- the first character in the class definition is a circumflex, in which
+ the first character in the class definition is a circumflex, in which
case the subject character must not be in the set defined by the class.
- If a circumflex is actually required as a member of the class, ensure
+ If a circumflex is actually required as a member of the class, ensure
it is not the first character, or escape it with a backslash.
- For example, the character class [aeiou] matches any lower case vowel,
- while [^aeiou] matches any character that is not a lower case vowel.
+ For example, the character class [aeiou] matches any lower case vowel,
+ while [^aeiou] matches any character that is not a lower case vowel.
Note that a circumflex is just a convenient notation for specifying the
- characters that are in the class by enumerating those that are not. A
- class that starts with a circumflex is not an assertion; it still con-
- sumes a character from the subject string, and therefore it fails if
+ characters that are in the class by enumerating those that are not. A
+ class that starts with a circumflex is not an assertion; it still con-
+ sumes a character from the subject string, and therefore it fails if
the current pointer is at the end of the string.
- Characters in a class may be specified by their code points using \o,
- \x, or \N{U+hh..} in the usual way. When caseless matching is set, any
- letters in a class represent both their upper case and lower case ver-
- sions, so for example, a caseless [aeiou] matches "A" as well as "a",
- and a caseless [^aeiou] does not match "A", whereas a caseful version
+ Characters in a class may be specified by their code points using \o,
+ \x, or \N{U+hh..} in the usual way. When caseless matching is set, any
+ letters in a class represent both their upper case and lower case ver-
+ sions, so for example, a caseless [aeiou] matches "A" as well as "a",
+ and a caseless [^aeiou] does not match "A", whereas a caseful version
would.
- Characters that might indicate line breaks are never treated in any
- special way when matching character classes, whatever line-ending
- sequence is in use, and whatever setting of the PCRE2_DOTALL and
- PCRE2_MULTILINE options is used. A class such as [^a] always matches
+ Characters that might indicate line breaks are never treated in any
+ special way when matching character classes, whatever line-ending
+ sequence is in use, and whatever setting of the PCRE2_DOTALL and
+ PCRE2_MULTILINE options is used. A class such as [^a] always matches
one of these characters.
The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s,
- \S, \v, \V, \w, and \W may appear in a character class, and add the
- characters that they match to the class. For example, [\dABCDEF]
- matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option
- affects the meanings of \d, \s, \w and their upper case partners, just
- as it does when they appear outside a character class, as described in
- the section entitled "Generic character types" above. The escape
- sequence \b has a different meaning inside a character class; it
- matches the backspace character. The sequences \B, \R, and \X are not
- special inside a character class. Like any other unrecognized escape
- sequences, they cause an error. The same is true for \N when not fol-
+ \S, \v, \V, \w, and \W may appear in a character class, and add the
+ characters that they match to the class. For example, [\dABCDEF]
+ matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option
+ affects the meanings of \d, \s, \w and their upper case partners, just
+ as it does when they appear outside a character class, as described in
+ the section entitled "Generic character types" above. The escape
+ sequence \b has a different meaning inside a character class; it
+ matches the backspace character. The sequences \B, \R, and \X are not
+ special inside a character class. Like any other unrecognized escape
+ sequences, they cause an error. The same is true for \N when not fol-
lowed by an opening brace.
- The minus (hyphen) character can be used to specify a range of charac-
- ters in a character class. For example, [d-m] matches any letter
- between d and m, inclusive. If a minus character is required in a
- class, it must be escaped with a backslash or appear in a position
- where it cannot be interpreted as indicating a range, typically as the
+ The minus (hyphen) character can be used to specify a range of charac-
+ ters in a character class. For example, [d-m] matches any letter
+ between d and m, inclusive. If a minus character is required in a
+ class, it must be escaped with a backslash or appear in a position
+ where it cannot be interpreted as indicating a range, typically as the
first or last character in the class, or immediately after a range. For
- example, [b-d-z] matches letters in the range b to d, a hyphen charac-
+ example, [b-d-z] matches letters in the range b to d, a hyphen charac-
ter, or z.
Perl treats a hyphen as a literal if it appears before or after a POSIX
class (see below) or before or after a character type escape such as as
- \d or \H. However, unless the hyphen is the last character in the
- class, Perl outputs a warning in its warning mode, as this is most
- likely a user error. As PCRE2 has no facility for warning, an error is
+ \d or \H. However, unless the hyphen is the last character in the
+ class, Perl outputs a warning in its warning mode, as this is most
+ likely a user error. As PCRE2 has no facility for warning, an error is
given in these cases.
It is not possible to have the literal character "]" as the end charac-
- ter of a range. A pattern such as [W-]46] is interpreted as a class of
- two characters ("W" and "-") followed by a literal string "46]", so it
- would match "W46]" or "-46]". However, if the "]" is escaped with a
- backslash it is interpreted as the end of range, so [W-\]46] is inter-
- preted as a class containing a range followed by two other characters.
- The octal or hexadecimal representation of "]" can also be used to end
+ ter of a range. A pattern such as [W-]46] is interpreted as a class of
+ two characters ("W" and "-") followed by a literal string "46]", so it
+ would match "W46]" or "-46]". However, if the "]" is escaped with a
+ backslash it is interpreted as the end of range, so [W-\]46] is inter-
+ preted as a class containing a range followed by two other characters.
+ The octal or hexadecimal representation of "]" can also be used to end
a range.
Ranges normally include all code points between the start and end char-
- acters, inclusive. They can also be used for code points specified
+ acters, inclusive. They can also be used for code points specified
numerically, for example [\000-\037]. Ranges can include any characters
- that are valid for the current mode. In any UTF mode, the so-called
- "surrogate" characters (those whose code points lie between 0xd800 and
- 0xdfff inclusive) may not be specified explicitly by default (the
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
+ that are valid for the current mode. In any UTF mode, the so-called
+ "surrogate" characters (those whose code points lie between 0xd800 and
+ 0xdfff inclusive) may not be specified explicitly by default (the
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates,
are always permitted.
- There is a special case in EBCDIC environments for ranges whose end
+ There is a special case in EBCDIC environments for ranges whose end
points are both specified as literal letters in the same case. For com-
- patibility with Perl, EBCDIC code points within the range that are not
- letters are omitted. For example, [h-k] matches only four characters,
+ patibility with Perl, EBCDIC code points within the range that are not
+ letters are omitted. For example, [h-k] matches only four characters,
even though the codes for h and k are 0x88 and 0x92, a range of 11 code
- points. However, if the range is specified numerically, for example,
+ points. However, if the range is specified numerically, for example,
[\x88-\x92] or [h-\x92], all code points are included.
If a range that includes letters is used when caseless matching is set,
it matches the letters in either case. For example, [W-c] is equivalent
- to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
- character tables for a French locale are in use, [\xc8-\xcb] matches
+ to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
+ character tables for a French locale are in use, [\xc8-\xcb] matches
accented E characters in both cases.
- A circumflex can conveniently be used with the upper case character
- types to specify a more restricted set of characters than the matching
- lower case type. For example, the class [^\W_] matches any letter or
+ A circumflex can conveniently be used with the upper case character
+ types to specify a more restricted set of characters than the matching
+ lower case type. For example, the class [^\W_] matches any letter or
digit, but not underscore, whereas [\w] includes underscore. A positive
character class should be read as "something OR something OR ..." and a
negative class as "NOT something AND NOT something AND NOT ...".
- The only metacharacters that are recognized in character classes are
- backslash, hyphen (only where it can be interpreted as specifying a
- range), circumflex (only at the start), opening square bracket (only
- when it can be interpreted as introducing a POSIX class name, or for a
- special compatibility feature - see the next two sections), and the
+ The only metacharacters that are recognized in character classes are
+ backslash, hyphen (only where it can be interpreted as specifying a
+ range), circumflex (only at the start), opening square bracket (only
+ when it can be interpreted as introducing a POSIX class name, or for a
+ special compatibility feature - see the next two sections), and the
terminating closing square bracket. However, escaping other non-
alphanumeric characters does no harm.
@@ -7304,7 +7105,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES
POSIX CHARACTER CLASSES
Perl supports the POSIX notation for character classes. This uses names
- enclosed by [: and :] within the enclosing square brackets. PCRE2 also
+ enclosed by [: and :] within the enclosing square brackets. PCRE2 also
supports this notation. For example,
[01[:alpha:]%]
@@ -7327,13 +7128,13 @@ POSIX CHARACTER CLASSES
word "word" characters (same as \w)
xdigit hexadecimal digits
- The default "space" characters are HT (9), LF (10), VT (11), FF (12),
- CR (13), and space (32). If locale-specific matching is taking place,
- the list of space characters may be different; there may be fewer or
+ The default "space" characters are HT (9), LF (10), VT (11), FF (12),
+ CR (13), and space (32). If locale-specific matching is taking place,
+ the list of space characters may be different; there may be fewer or
more of them. "Space" and \s match the same set of characters.
- The name "word" is a Perl extension, and "blank" is a GNU extension
- from Perl 5.8. Another Perl extension is negation, which is indicated
+ The name "word" is a Perl extension, and "blank" is a GNU extension
+ from Perl 5.8. Another Perl extension is negation, which is indicated
by a ^ character after the colon. For example,
[12[:^digit:]]
@@ -7344,9 +7145,9 @@ POSIX CHARACTER CLASSES
By default, characters with values greater than 127 do not match any of
the POSIX character classes, although this may be different for charac-
- ters in the range 128-255 when locale-specific matching is happening.
- However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
- the classes are changed so that Unicode character properties are used.
+ ters in the range 128-255 when locale-specific matching is happening.
+ However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
+ the classes are changed so that Unicode character properties are used.
This is achieved by replacing certain POSIX classes with other
sequences, as follows:
@@ -7360,10 +7161,10 @@ POSIX CHARACTER CLASSES
[:upper:] becomes \p{Lu}
[:word:] becomes \p{Xwd}
- Negated versions, such as [:^alpha:] use \P instead of \p. Three other
+ Negated versions, such as [:^alpha:] use \P instead of \p. Three other
POSIX classes are handled specially in UCP mode:
- [:graph:] This matches characters that have glyphs that mark the page
+ [:graph:] This matches characters that have glyphs that mark the page
when printed. In Unicode property terms, it matches all char-
acters with the L, M, N, P, S, or Cf properties, except for:
@@ -7372,60 +7173,60 @@ POSIX CHARACTER CLASSES
U+2066 - U+2069 Various "isolate"s
- [:print:] This matches the same characters as [:graph:] plus space
- characters that are not controls, that is, characters with
+ [:print:] This matches the same characters as [:graph:] plus space
+ characters that are not controls, that is, characters with
the Zs property.
[:punct:] This matches all characters that have the Unicode P (punctua-
- tion) property, plus those characters with code points less
+ tion) property, plus those characters with code points less
than 256 that have the S (Symbol) property.
- The other POSIX classes are unchanged, and match only characters with
+ The other POSIX classes are unchanged, and match only characters with
code points less than 256.
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
- In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
- ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
+ In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
+ ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
and "end of word". PCRE2 treats these items as follows:
[[:<:]] is converted to \b(?=\w)
[[:>:]] is converted to \b(?<=\w)
Only these exact character sequences are recognized. A sequence such as
- [a[:<:]b] provokes error for an unrecognized POSIX class name. This
- support is not compatible with Perl. It is provided to help migrations
+ [a[:<:]b] provokes error for an unrecognized POSIX class name. This
+ support is not compatible with Perl. It is provided to help migrations
from other environments, and is best not used in any new patterns. Note
- that \b matches at the start and the end of a word (see "Simple asser-
- tions" above), and in a Perl-style pattern the preceding or following
- character normally shows which is wanted, without the need for the
- assertions that are used above in order to give exactly the POSIX be-
+ that \b matches at the start and the end of a word (see "Simple asser-
+ tions" above), and in a Perl-style pattern the preceding or following
+ character normally shows which is wanted, without the need for the
+ assertions that are used above in order to give exactly the POSIX be-
haviour.
VERTICAL BAR
- Vertical bar characters are used to separate alternative patterns. For
+ Vertical bar characters are used to separate alternative patterns. For
example, the pattern
gilbert|sullivan
- matches either "gilbert" or "sullivan". Any number of alternatives may
- appear, and an empty alternative is permitted (matching the empty
+ matches either "gilbert" or "sullivan". Any number of alternatives may
+ appear, and an empty alternative is permitted (matching the empty
string). The matching process tries each alternative in turn, from left
- to right, and the first one that succeeds is used. If the alternatives
- are within a group (defined below), "succeeds" means matching the rest
- of the main pattern as well as the alternative in the group.
+ to right, and the first one that succeeds is used. If the alternatives
+ are within a subpattern (defined below), "succeeds" means matching the
+ rest of the main pattern as well as the alternative in the subpattern.
INTERNAL OPTION SETTING
- The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
- PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
- can be changed from within the pattern by a sequence of letters
- enclosed between "(?" and ")". These options are Perl-compatible, and
- are described in detail in the pcre2api documentation. The option let-
+ The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
+ PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
+ can be changed from within the pattern by a sequence of letters
+ enclosed between "(?" and ")". These options are Perl-compatible, and
+ are described in detail in the pcre2api documentation. The option let-
ters are:
i for PCRE2_CASELESS
@@ -7436,73 +7237,72 @@ INTERNAL OPTION SETTING
xx for PCRE2_EXTENDED_MORE
For example, (?im) sets caseless, multiline matching. It is also possi-
- ble to unset these options by preceding the relevant letters with a
+ ble to unset these options by preceding the relevant letters with a
hyphen, for example (?-im). The two "extended" options are not indepen-
dent; unsetting either one cancels the effects of both of them.
- A combined setting and unsetting such as (?im-sx), which sets
- PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and
- PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the
- options string. If a letter appears both before and after the hyphen,
- the option is unset. An empty options setting "(?)" is allowed. Need-
+ A combined setting and unsetting such as (?im-sx), which sets
+ PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and
+ PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the
+ options string. If a letter appears both before and after the hyphen,
+ the option is unset. An empty options setting "(?)" is allowed. Need-
less to say, it has no effect.
- If the first character following (? is a circumflex, it causes all of
- the above options to be unset. Thus, (?^) is equivalent to (?-imnsx).
- Letters may follow the circumflex to cause some options to be re-
+ If the first character following (? is a circumflex, it causes all of
+ the above options to be unset. Thus, (?^) is equivalent to (?-imnsx).
+ Letters may follow the circumflex to cause some options to be re-
instated, but a hyphen may not appear.
- The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be
- changed in the same way as the Perl-compatible options by using the
+ The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be
+ changed in the same way as the Perl-compatible options by using the
characters J and U respectively. However, these are not unset by (?^).
- When one of these option changes occurs at top level (that is, not
- inside group parentheses), the change applies to the remainder of the
- pattern that follows. An option change within a group (see below for a
- description of groups) affects only that part of the group that follows
- it, so
+ When one of these option changes occurs at top level (that is, not
+ inside subpattern parentheses), the change applies to the remainder of
+ the pattern that follows. An option change within a subpattern (see
+ below for a description of subpatterns) affects only that part of the
+ subpattern that follows it, so
(a(?i)b)c
- matches abc and aBc and no other strings (assuming PCRE2_CASELESS is
- not used). By this means, options can be made to have different set-
+ matches abc and aBc and no other strings (assuming PCRE2_CASELESS is
+ not used). By this means, options can be made to have different set-
tings in different parts of the pattern. Any changes made in one alter-
- native do carry on into subsequent branches within the same group. For
- example,
+ native do carry on into subsequent branches within the same subpattern.
+ For example,
(a(?i)b|c)
- matches "ab", "aB", "c", and "C", even though when matching "C" the
- first branch is abandoned before the option setting. This is because
- the effects of option settings happen at compile time. There would be
+ matches "ab", "aB", "c", and "C", even though when matching "C" the
+ first branch is abandoned before the option setting. This is because
+ the effects of option settings happen at compile time. There would be
some very weird behaviour otherwise.
- As a convenient shorthand, if any option settings are required at the
- start of a non-capturing group (see the next section), the option let-
- ters may appear between the "?" and the ":". Thus the two patterns
+ As a convenient shorthand, if any option settings are required at the
+ start of a non-capturing subpattern (see the next section), the option
+ letters may appear between the "?" and the ":". Thus the two patterns
(?i:saturday|sunday)
(?:(?i)saturday|sunday)
match exactly the same set of strings.
- Note: There are other PCRE2-specific options, applying to the whole
- pattern, which can be set by the application when the compiling func-
- tion is called. In addition, the pattern can contain special leading
- sequences such as (*CRLF) to override what the application has set or
- what has been defaulted. Details are given in the section entitled
- "Newline sequences" above. There are also the (*UTF) and (*UCP) leading
- sequences that can be used to set UTF and Unicode property modes; they
- are equivalent to setting the PCRE2_UTF and PCRE2_UCP options, respec-
- tively. However, the application can set the PCRE2_NEVER_UTF and
- PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and
- (*UCP) sequences.
+ Note: There are other PCRE2-specific options that can be set by the
+ application when the compiling function is called. The pattern can con-
+ tain special leading sequences such as (*CRLF) to override what the
+ application has set or what has been defaulted. Details are given in
+ the section entitled "Newline sequences" above. There are also the
+ (*UTF) and (*UCP) leading sequences that can be used to set UTF and
+ Unicode property modes; they are equivalent to setting the PCRE2_UTF
+ and PCRE2_UCP options, respectively. However, the application can set
+ the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP options, which lock out the use
+ of the (*UTF) and (*UCP) sequences.
-GROUPS
+SUBPATTERNS
- Groups are delimited by parentheses (round brackets), which can be
- nested. Turning part of a pattern into a group does two things:
+ Subpatterns are delimited by parentheses (round brackets), which can be
+ nested. Turning part of a pattern into a subpattern does two things:
1. It localizes a set of alternatives. For example, the pattern
@@ -7511,53 +7311,55 @@ GROUPS
matches "cataract", "caterpillar", or "cat". Without the parentheses,
it would match "cataract", "erpillar" or an empty string.
- 2. It creates a "capture group". This means that, when the whole pat-
- tern matches, the portion of the subject string that matched the group
- is passed back to the caller, separately from the portion that matched
- the whole pattern. (This applies only to the traditional matching
- function; the DFA matching function does not support capturing.)
+ 2. It sets up the subpattern as a capturing subpattern. This means
+ that, when the whole pattern matches, the portion of the subject string
+ that matched the subpattern is passed back to the caller, separately
+ from the portion that matched the whole pattern. (This applies only to
+ the traditional matching function; the DFA matching function does not
+ support capturing.)
Opening parentheses are counted from left to right (starting from 1) to
- obtain numbers for capture groups. For example, if the string "the red
- king" is matched against the pattern
+ obtain numbers for the capturing subpatterns. For example, if the
+ string "the red king" is matched against the pattern
the ((red|white) (king|queen))
the captured substrings are "red king", "red", and "king", and are num-
bered 1, 2, and 3, respectively.
- The fact that plain parentheses fulfil two functions is not always
- helpful. There are often times when grouping is required without cap-
- turing. If an opening parenthesis is followed by a question mark and a
- colon, the group does not do any capturing, and is not counted when
- computing the number of any subsequent capture groups. For example, if
- the string "the white queen" is matched against the pattern
+ The fact that plain parentheses fulfil two functions is not always
+ helpful. There are often times when a grouping subpattern is required
+ without a capturing requirement. If an opening parenthesis is followed
+ by a question mark and a colon, the subpattern does not do any captur-
+ ing, and is not counted when computing the number of any subsequent
+ capturing subpatterns. For example, if the string "the white queen" is
+ matched against the pattern
the ((?:red|white) (king|queen))
the captured substrings are "white queen" and "queen", and are numbered
- 1 and 2. The maximum number of capture groups is 65535.
+ 1 and 2. The maximum number of capturing subpatterns is 65535.
As a convenient shorthand, if any option settings are required at the
- start of a non-capturing group, the option letters may appear between
- the "?" and the ":". Thus the two patterns
+ start of a non-capturing subpattern, the option letters may appear
+ between the "?" and the ":". Thus the two patterns
(?i:saturday|sunday)
(?:(?i)saturday|sunday)
match exactly the same set of strings. Because alternative branches are
tried from left to right, and options are not reset until the end of
- the group is reached, an option setting in one branch does affect sub-
- sequent branches, so the above patterns match "SUNDAY" as well as "Sat-
- urday".
+ the subpattern is reached, an option setting in one branch does affect
+ subsequent branches, so the above patterns match "SUNDAY" as well as
+ "Saturday".
-DUPLICATE GROUP NUMBERS
+DUPLICATE SUBPATTERN NUMBERS
- Perl 5.10 introduced a feature whereby each alternative in a group uses
- the same numbers for its capturing parentheses. Such a group starts
- with (?| and is itself a non-capturing group. For example, consider
- this pattern:
+ Perl 5.10 introduced a feature whereby each alternative in a subpattern
+ uses the same numbers for its capturing parentheses. Such a subpattern
+ starts with (?| and is itself a non-capturing subpattern. For example,
+ consider this pattern:
(?|(Sat)ur|(Sun))day
@@ -7568,7 +7370,7 @@ DUPLICATE GROUP NUMBERS
not all, of one of a number of alternatives. Inside a (?| group, paren-
theses are numbered as usual, but the number is reset at the start of
each branch. The numbers of any capturing parentheses that follow the
- whole group start after the highest number used in any branch. The fol-
+ subpattern start after the highest number used in any branch. The fol-
lowing example is taken from the Perl documentation. The numbers under-
neath show in which buffer the captured content will be stored.
@@ -7576,94 +7378,90 @@ DUPLICATE GROUP NUMBERS
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
- A backreference to a capture group uses the most recent value that is
- set for the group. The following pattern matches "abcabc" or "defdef":
+ A backreference to a numbered subpattern uses the most recent value
+ that is set for that number by any subpattern. The following pattern
+ matches "abcabc" or "defdef":
/(?|(abc)|(def))\1/
- In contrast, a subroutine call to a capture group always refers to the
- first one in the pattern with the given number. The following pattern
- matches "abcabc" or "defabc":
+ In contrast, a subroutine call to a numbered subpattern always refers
+ to the first one in the pattern with the given number. The following
+ pattern matches "abcabc" or "defabc":
/(?|(abc)|(def))(?1)/
A relative reference such as (?-1) is no different: it is just a conve-
nient way of computing an absolute group number.
- If a condition test for a group's having matched refers to a non-unique
- number, the test is true if any group with that number has matched.
+ If a condition test for a subpattern's having matched refers to a non-
+ unique number, the test is true if any of the subpatterns of that num-
+ ber have matched.
An alternative approach to using this "branch reset" feature is to use
- duplicate named groups, as described in the next section.
+ duplicate named subpatterns, as described in the next section.
-NAMED CAPTURE GROUPS
+NAMED SUBPATTERNS
- Identifying capture groups by number is simple, but it can be very hard
- to keep track of the numbers in complicated patterns. Furthermore, if
- an expression is modified, the numbers may change. To help with this
- difficulty, PCRE2 supports the naming of capture groups. This feature
- was not added to Perl until release 5.10. Python had the feature ear-
- lier, and PCRE1 introduced it at release 4.0, using the Python syntax.
- PCRE2 supports both the Perl and the Python syntax.
+ Identifying capturing parentheses by number is simple, but it can be
+ very hard to keep track of the numbers in complicated patterns. Fur-
+ thermore, if an expression is modified, the numbers may change. To help
+ with this difficulty, PCRE2 supports the naming of capturing subpat-
+ terns. This feature was not added to Perl until release 5.10. Python
+ had the feature earlier, and PCRE1 introduced it at release 4.0, using
+ the Python syntax. PCRE2 supports both the Perl and the Python syntax.
- In PCRE2, a capture group can be named in one of three ways:
+ In PCRE2, a capturing subpattern can be named in one of three ways:
(?<name>...) or (?'name'...) as in Perl, or (?P<name>...) as in Python.
- Names may be up to 32 code units long. When PCRE2_UTF is not set, they
- may contain only ASCII alphanumeric characters and underscores, but
- must start with a non-digit. When PCRE2_UTF is set, the syntax of group
- names is extended to allow any Unicode letter or Unicode decimal digit.
- In other words, group names must match one of these patterns:
-
- ^[_A-Za-z][_A-Za-z0-9]*\z when PCRE2_UTF is not set
- ^[_\p{L}][_\p{L}\p{Nd}]*\z when PCRE2_UTF is set
-
- References to capture groups from other parts of the pattern, such as
- backreferences, recursion, and conditions, can all be made by name as
- well as by number.
-
- Named capture groups are allocated numbers as well as names, exactly as
- if the names were not present. In both PCRE2 and Perl, capture groups
- are primarily identified by numbers; any names are just aliases for
- these numbers. The PCRE2 API provides function calls for extracting the
- complete name-to-number translation table from a compiled pattern, as
- well as convenience functions for extracting captured substrings by
- name.
-
- Warning: When more than one capture group has the same number, as
- described in the previous section, a name given to one of them applies
- to all of them. Perl allows identically numbered groups to have differ-
- ent names. Consider this pattern, where there are two capture groups,
- both numbered 1:
+ Names consist of up to 32 alphanumeric characters and underscores, but
+ must start with a non-digit. References to capturing parentheses from
+ other parts of the pattern, such as backreferences, recursion, and con-
+ ditions, can all be made by name as well as by number.
+
+ Named capturing parentheses are allocated numbers as well as names,
+ exactly as if the names were not present. In both PCRE2 and Perl, cap-
+ turing subpatterns are primarily identified by numbers; any names are
+ just aliases for these numbers. The PCRE2 API provides function calls
+ for extracting the complete name-to-number translation table from a
+ compiled pattern, as well as convenience functions for extracting cap-
+ tured substrings by name.
+
+ Warning: When more than one subpattern has the same number, as
+ described in the previous section, a name given to one of them applies
+ to all of them. Perl allows identically numbered subpatterns to have
+ different names. Consider this pattern, where there are two capturing
+ subpatterns, both numbered 1:
(?|(?<AA>aa)|(?<BB>bb))
- Perl allows this, with both names AA and BB as aliases of group 1.
+ Perl allows this, with both names AA and BB as aliases of group 1.
Thus, after a successful match, both names yield the same value (either
"aa" or "bb").
- In an attempt to reduce confusion, PCRE2 does not allow the same group
+ In an attempt to reduce confusion, PCRE2 does not allow the same group
number to be associated with more than one name. The example above pro-
- vokes a compile-time error. However, there is still scope for confu-
+ vokes a compile-time error. However, there is still scope for confu-
sion. Consider this pattern:
(?|(?<AA>aa)|(bb))
- Although the second group number 1 is not explicitly named, the name AA
- is still an alias for any group 1. Whether the pattern matches "aa" or
- "bb", a reference by name to group AA yields the matched string.
+ Although the second subpattern number 1 is not explicitly named, the
+ name AA is still an alias for subpattern 1. Whether the pattern matches
+ "aa" or "bb", a reference by name to group AA yields the matched
+ string.
By default, a name must be unique within a pattern, except that dupli-
- cate names are permitted for groups with the same number, for example:
+ cate names are permitted for subpatterns with the same number, for
+ example:
(?|(?<AA>aa)|(?<AA>bb))
The duplicate name constraint can be disabled by setting the PCRE2_DUP-
NAMES option at compile time, or by the use of (?J) within the pattern.
- Duplicate names can be useful for patterns where only one instance of
- the named capture group can match. Suppose you want to match the name
- of a weekday, either as a 3-letter abbreviation or as the full name,
- and in both cases you want to extract the abbreviation. This pattern
+ Duplicate names can be useful for patterns where only one instance of
+ the named parentheses can match. Suppose you want to match the name of
+ a weekday, either as a 3-letter abbreviation or as the full name, and
+ in both cases you want to extract the abbreviation. This pattern
(ignoring the line breaks) does the job:
(?<DN>Mon|Fri|Sun)(?:day)?|
@@ -7672,32 +7470,33 @@ NAMED CAPTURE GROUPS
(?<DN>Thu)(?:rsday)?|
(?<DN>Sat)(?:urday)?
- There are five capture groups, but only one is ever set after a match.
- The convenience functions for extracting the data by name returns the
- substring for the first (and in this example, the only) group of that
- name that matched. This saves searching to find which numbered group it
- was. (An alternative way of solving this problem is to use a "branch
- reset" group, as described in the previous section.)
+ There are five capturing substrings, but only one is ever set after a
+ match. The convenience functions for extracting the data by name
+ returns the substring for the first (and in this example, the only)
+ subpattern of that name that matched. This saves searching to find
+ which numbered subpattern it was. (An alternative way of solving this
+ problem is to use a "branch reset" subpattern, as described in the pre-
+ vious section.)
- If you make a backreference to a non-unique named group from elsewhere
- in the pattern, the groups to which the name refers are checked in the
- order in which they appear in the overall pattern. The first one that
- is set is used for the reference. For example, this pattern matches
- both "foofoo" and "barbar" but not "foobar" or "barfoo":
+ If you make a backreference to a non-unique named subpattern from else-
+ where in the pattern, the subpatterns to which the name refers are
+ checked in the order in which they appear in the overall pattern. The
+ first one that is set is used for the reference. For example, this pat-
+ tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo":
(?:(?<n>foo)|(?<n>bar))\k<n>
- If you make a subroutine call to a non-unique named group, the one that
- corresponds to the first occurrence of the name is used. In the absence
- of duplicate numbers this is the one with the lowest number.
+ If you make a subroutine call to a non-unique named subpattern, the one
+ that corresponds to the first occurrence of the name is used. In the
+ absence of duplicate numbers this is the one with the lowest number.
If you use a named reference in a condition test (see the section about
- conditions below), either to check whether a capture group has matched,
- or to check for recursion, all groups with the same name are tested. If
- the condition is true for any one of them, the overall condition is
+ conditions below), either to check whether a subpattern has matched, or
+ to check for recursion, all subpatterns with the same name are tested.
+ If the condition is true for any one of them, the overall condition is
true. This is the same behaviour as testing by number. For further
- details of the interfaces for handling named capture groups, see the
+ details of the interfaces for handling named subpatterns, see the
pcre2api documentation.
@@ -7709,18 +7508,18 @@ REPETITION
a literal data character
the dot metacharacter
the \C escape sequence
- the \R escape sequence
the \X escape sequence
+ the \R escape sequence
an escape such as \d or \pL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
- a subroutine call (recursive or otherwise)
+ a parenthesized subpattern (including most assertions)
+ a subroutine call to a subpattern (recursive or otherwise)
The general repetition quantifier specifies a minimum and maximum num-
ber of permitted matches, by giving the two numbers in curly brackets
(braces), separated by a comma. The numbers must be less than 65536,
- and the first must be less than or equal to the second. For example,
+ and the first must be less than or equal to the second. For example:
z{2,4}
@@ -7750,34 +7549,34 @@ REPETITION
The quantifier {0} is permitted, causing the expression to behave as if
the previous item and the quantifier were not present. This may be use-
- ful for capture groups that are referenced as subroutines from else-
- where in the pattern (but see also the section entitled "Defining cap-
- ture groups for use by reference only" below). Except for parenthesized
- groups, items that have a {0} quantifier are omitted from the compiled
- pattern.
+ ful for subpatterns that are referenced as subroutines from elsewhere
+ in the pattern (but see also the section entitled "Defining subpatterns
+ for use by reference only" below). Items other than subpatterns that
+ have a {0} quantifier are omitted from the compiled pattern.
- For convenience, the three most common quantifiers have single-charac-
+ For convenience, the three most common quantifiers have single-charac-
ter abbreviations:
* is equivalent to {0,}
+ is equivalent to {1,}
? is equivalent to {0,1}
- It is possible to construct infinite loops by following a group that
- can match no characters with a quantifier that has no upper limit, for
- example:
+ It is possible to construct infinite loops by following a subpattern
+ that can match no characters with a quantifier that has no upper limit,
+ for example:
(a?)*
- Earlier versions of Perl and PCRE1 used to give an error at compile
+ Earlier versions of Perl and PCRE1 used to give an error at compile
time for such patterns. However, because there are cases where this can
be useful, such patterns are now accepted, but if any repetition of the
- group does in fact match no characters, the loop is forcibly broken.
+ subpattern does in fact match no characters, the loop is forcibly bro-
+ ken.
- By default, quantifiers are "greedy", that is, they match as much as
- possible (up to the maximum number of permitted times), without causing
- the rest of the pattern to fail. The classic example of where this
- gives problems is in trying to match comments in C programs. These
+ By default, the quantifiers are "greedy", that is, they match as much
+ as possible (up to the maximum number of permitted times), without
+ causing the rest of the pattern to fail. The classic example of where
+ this gives problems is in trying to match comments in C programs. These
appear between /* and */ and within the comment, individual * and /
characters may appear. An attempt to match C comments by applying the
pattern
@@ -7789,9 +7588,11 @@ REPETITION
/* first comment */ not comment /* second comment */
fails, because it matches the entire string owing to the greediness of
- the .* item. However, if a quantifier is followed by a question mark,
- it ceases to be greedy, and instead matches the minimum number of times
- possible, so the pattern
+ the .* item.
+
+ If a quantifier is followed by a question mark, it ceases to be greedy,
+ and instead matches the minimum number of times possible, so the pat-
+ tern
/\*.*?\*/
@@ -7811,8 +7612,8 @@ REPETITION
can be made greedy by following them with a question mark. In other
words, it inverts the default behaviour.
- When a parenthesized group is quantified with a minimum repeat count
- that is greater than 1 or with a limited maximum, more memory is
+ When a parenthesized subpattern is quantified with a minimum repeat
+ count that is greater than 1 or with a limited maximum, more memory is
required for the compiled pattern, in proportion to the size of the
minimum or maximum.
@@ -7848,15 +7649,15 @@ REPETITION
trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and
there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly.
- When a capture group is repeated, the value captured is the substring
- that matched the final iteration. For example, after
+ When a capturing subpattern is repeated, the value captured is the sub-
+ string that matched the final iteration. For example, after
(tweedle[dume]{3}\s*)+
has matched "tweedledum tweedledee" the value of the captured substring
- is "tweedledee". However, if there are nested capture groups, the cor-
- responding captured values may have been set in previous iterations.
- For example, after
+ is "tweedledee". However, if there are nested capturing subpatterns,
+ the corresponding captured values may have been set in previous itera-
+ tions. For example, after
(a|(b))+
@@ -7882,8 +7683,8 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
action of the matcher is to try again with only 5 digits matching the
\d+ item, and then with 4, and so on, before ultimately failing.
"Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
- the means for specifying that once a group has matched, it is not to be
- re-evaluated in this way.
+ the means for specifying that once a subpattern has matched, it is not
+ to be re-evaluated in this way.
If we use atomic grouping for the previous example, the matcher gives
up immediately on failing to match "foo" the first time. The notation
@@ -7891,32 +7692,28 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
(?>\d+)foo
- Perl 5.28 introduced an experimental alphabetic form starting with (*
- which may be easier to remember:
-
- (*atomic:\d+)foo
-
- This kind of parenthesized group "locks up" the part of the pattern it
- contains once it has matched, and a failure further into the pattern is
- prevented from backtracking into it. Backtracking past it to previous
+ This kind of parenthesis "locks up" the part of the pattern it con-
+ tains once it has matched, and a failure further into the pattern is
+ prevented from backtracking into it. Backtracking past it to previous
items, however, works as normal.
- An alternative description is that a group of this type matches exactly
- the string of characters that an identical standalone pattern would
- match, if anchored at the current point in the subject string.
+ An alternative description is that a subpattern of this type matches
+ exactly the string of characters that an identical standalone pattern
+ would match, if anchored at the current point in the subject string.
- Atomic groups are not capture groups. Simple cases such as the above
- example can be thought of as a maximizing repeat that must swallow
- everything it can. So, while both \d+ and \d+? are prepared to adjust
- the number of digits they match in order to make the rest of the pat-
- tern match, (?>\d+) can only match an entire sequence of digits.
+ Atomic grouping subpatterns are not capturing subpatterns. Simple cases
+ such as the above example can be thought of as a maximizing repeat that
+ must swallow everything it can. So, while both \d+ and \d+? are pre-
+ pared to adjust the number of digits they match in order to make the
+ rest of the pattern match, (?>\d+) can only match an entire sequence of
+ digits.
Atomic groups in general can of course contain arbitrarily complicated
- expressions, and can be nested. However, when the contents of an atomic
- group is just a single repeated item, as in the example above, a sim-
- pler notation, called a "possessive quantifier" can be used. This con-
- sists of an additional + character following a quantifier. Using this
- notation, the previous example can be rewritten as
+ subpatterns, and can be nested. However, when the subpattern for an
+ atomic group is just a single repeated item, as in the example above, a
+ simpler notation, called a "possessive quantifier" can be used. This
+ consists of an additional + character following a quantifier. Using
+ this notation, the previous example can be rewritten as
\d++foo
@@ -7935,8 +7732,8 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
The possessive quantifier syntax is an extension to the Perl 5.8 syn-
tax. Jeffrey Friedl originated the idea (and the name) in the first
edition of his book. Mike McCloskey liked it, so implemented it when he
- built Sun's Java package, and PCRE1 copied it from there. It found its
- way into Perl at release 5.10.
+ built Sun's Java package, and PCRE1 copied it from there. It ultimately
+ found its way into Perl at release 5.10.
PCRE2 has an optimization that automatically "possessifies" certain
simple pattern constructs. For example, the sequence A+B is treated as
@@ -7944,10 +7741,10 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
when B must follow. This feature can be disabled by the PCRE2_NO_AUTO-
POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS).
- When a pattern contains an unlimited repeat inside a group that can
- itself be repeated an unlimited number of times, the use of an atomic
- group is the only way to avoid some failing matches taking a very long
- time indeed. The pattern
+ When a pattern contains an unlimited repeat inside a subpattern that
+ can itself be repeated an unlimited number of times, the use of an
+ atomic group is the only way to avoid some failing matches taking a
+ very long time indeed. The pattern
(\D+|<\d+>)*[!?]
@@ -7975,28 +7772,29 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
BACKREFERENCES
Outside a character class, a backslash followed by a digit greater than
- 0 (and possibly further digits) is a backreference to a capture group
- earlier (that is, to its left) in the pattern, provided there have been
- that many previous capture groups.
+ 0 (and possibly further digits) is a backreference to a capturing sub-
+ pattern earlier (that is, to its left) in the pattern, provided there
+ have been that many previous capturing left parentheses.
However, if the decimal number following the backslash is less than 8,
it is always taken as a backreference, and causes an error only if
- there are not that many capture groups in the entire pattern. In other
- words, the group that is referenced need not be to the left of the ref-
- erence for numbers less than 8. A "forward backreference" of this type
- can make sense when a repetition is involved and the group to the right
- has participated in an earlier iteration.
-
- It is not possible to have a numerical "forward backreference" to a
- group whose number is 8 or more using this syntax because a sequence
- such as \50 is interpreted as a character defined in octal. See the
- subsection entitled "Non-printing characters" above for further details
- of the handling of digits following a backslash. Other forms of back-
- referencing do not suffer from this restriction. In particular, there
- is no problem when named capture groups are used (see below).
-
- Another way of avoiding the ambiguity inherent in the use of digits
- following a backslash is to use the \g escape sequence. This escape
+ there are not that many capturing left parentheses in the entire pat-
+ tern. In other words, the parentheses that are referenced need not be
+ to the left of the reference for numbers less than 8. A "forward back-
+ reference" of this type can make sense when a repetition is involved
+ and the subpattern to the right has participated in an earlier itera-
+ tion.
+
+ It is not possible to have a numerical "forward backreference" to a
+ subpattern whose number is 8 or more using this syntax because a
+ sequence such as \50 is interpreted as a character defined in octal.
+ See the subsection entitled "Non-printing characters" above for further
+ details of the handling of digits following a backslash. There is no
+ such problem when named parentheses are used. A backreference to any
+ subpattern is possible using named parentheses (see below).
+
+ Another way of avoiding the ambiguity inherent in the use of digits
+ following a backslash is to use the \g escape sequence. This escape
must be followed by a signed or unsigned number, optionally enclosed in
braces. These examples are all identical:
@@ -8004,85 +7802,86 @@ BACKREFERENCES
(ring), \g1
(ring), \g{1}
- An unsigned number specifies an absolute reference without the ambigu-
+ An unsigned number specifies an absolute reference without the ambigu-
ity that is present in the older syntax. It is also useful when literal
- digits follow the reference. A signed number is a relative reference.
+ digits follow the reference. A signed number is a relative reference.
Consider this example:
(abc(def)ghi)\g{-1}
- The sequence \g{-1} is a reference to the most recently started capture
- group before \g, that is, is it equivalent to \2 in this example. Simi-
- larly, \g{-2} would be equivalent to \1. The use of relative references
- can be helpful in long patterns, and also in patterns that are created
- by joining together fragments that contain references within them-
- selves.
+ The sequence \g{-1} is a reference to the most recently started captur-
+ ing subpattern before \g, that is, is it equivalent to \2 in this exam-
+ ple. Similarly, \g{-2} would be equivalent to \1. The use of relative
+ references can be helpful in long patterns, and also in patterns that
+ are created by joining together fragments that contain references
+ within themselves.
- The sequence \g{+1} is a reference to the next capture group. This kind
- of forward reference can be useful in patterns that repeat. Perl does
- not support the use of + in this way.
+ The sequence \g{+1} is a reference to the next capturing subpattern.
+ This kind of forward reference can be useful it patterns that repeat.
+ Perl does not support the use of + in this way.
- A backreference matches whatever actually most recently matched the
- capture group in the current subject string, rather than anything at
- all that matches the group (see "Groups as subroutines" below for a way
- of doing that). So the pattern
+ A backreference matches whatever actually matched the capturing subpat-
+ tern in the current subject string, rather than anything matching the
+ subpattern itself (see "Subpatterns as subroutines" below for a way of
+ doing that). So the pattern
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
- not "sense and responsibility". If caseful matching is in force at the
- time of the backreference, the case of letters is relevant. For exam-
+ matches "sense and sensibility" and "response and responsibility", but
+ not "sense and responsibility". If caseful matching is in force at the
+ time of the backreference, the case of letters is relevant. For exam-
ple,
((?i)rah)\s+\1
- matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
- original capture group is matched caselessly.
+ matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
+ original capturing subpattern is matched caselessly.
- There are several different ways of writing backreferences to named
- capture groups. The .NET syntax \k{name} and the Perl syntax \k<name>
- or \k'name' are supported, as is the Python syntax (?P=name). Perl
- 5.10's unified backreference syntax, in which \g can be used for both
- numeric and named references, is also supported. We could rewrite the
- above example in any of the following ways:
+ There are several different ways of writing backreferences to named
+ subpatterns. The .NET syntax \k{name} and the Perl syntax \k<name> or
+ \k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's
+ unified backreference syntax, in which \g can be used for both numeric
+ and named references, is also supported. We could rewrite the above
+ example in any of the following ways:
(?<p1>(?i)rah)\s+\k<p1>
(?'p1'(?i)rah)\s+\k{p1}
(?P<p1>(?i)rah)\s+(?P=p1)
(?<p1>(?i)rah)\s+\g{p1}
- A capture group that is referenced by name may appear in the pattern
+ A subpattern that is referenced by name may appear in the pattern
before or after the reference.
- There may be more than one backreference to the same group. If a group
- has not actually been used in a particular match, backreferences to it
- always fail by default. For example, the pattern
+ There may be more than one backreference to the same subpattern. If a
+ subpattern has not actually been used in a particular match, any back-
+ references to it always fail by default. For example, the pattern
(a|(bc))\2
- always fails if it starts to match "a" rather than "bc". However, if
+ always fails if it starts to match "a" rather than "bc". However, if
the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backref-
erence to an unset value matches an empty string.
- Because there may be many capture groups in a pattern, all digits fol-
- lowing a backslash are taken as part of a potential backreference num-
- ber. If the pattern continues with a digit character, some delimiter
- must be used to terminate the backreference. If the PCRE2_EXTENDED or
- PCRE2_EXTENDED_MORE option is set, this can be white space. Otherwise,
- the \g{} syntax or an empty comment (see "Comments" below) can be used.
+ Because there may be many capturing parentheses in a pattern, all dig-
+ its following a backslash are taken as part of a potential backrefer-
+ ence number. If the pattern continues with a digit character, some
+ delimiter must be used to terminate the backreference. If the
+ PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this can be white
+ space. Otherwise, the \g{ syntax or an empty comment (see "Comments"
+ below) can be used.
Recursive backreferences
- A backreference that occurs inside the group to which it refers fails
- when the group is first used, so, for example, (a\1) never matches.
- However, such references can be useful inside repeated groups. For
- example, the pattern
+ A backreference that occurs inside the parentheses to which it refers
+ fails when the subpattern is first used, so, for example, (a\1) never
+ matches. However, such references can be useful inside repeated sub-
+ patterns. For example, the pattern
(a|b\1)+
matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
- ation of the group, the backreference matches the character string cor-
- responding to the previous iteration. In order for this to work, the
+ ation of the subpattern, the backreference matches the character string
+ corresponding to the previous iteration. In order for this to work, the
pattern must be such that the first iteration does not need to match
the backreference. This can be done using alternation, as in the exam-
ple above, or by a quantifier with a minimum of zero.
@@ -8100,29 +7899,25 @@ ASSERTIONS
assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described
above.
- More complicated assertions are coded as parenthesized groups. There
- are two kinds: those that look ahead of the current position in the
- subject string, and those that look behind it, and in each case an
- assertion may be positive (must match for the assertion to be true) or
- negative (must not match for the assertion to be true). An assertion
- group is matched in the normal way, and if it is true, matching contin-
- ues after it, but with the matching position in the subject string is
- was it was before the assertion was processed.
-
- A lookaround assertion may also appear as the condition in a condi-
- tional group (see below). In this case, the result of matching the
- assertion determines which branch of the condition is followed.
-
- Assertion groups are not capture groups. If an assertion contains cap-
- ture groups within it, these are counted for the purposes of numbering
- the capture groups in the whole pattern. Within each branch of an
- assertion, locally captured substrings may be referenced in the usual
- way. For example, a sequence such as (.)\g{-1} can be used to check
- that two adjacent characters are the same.
+ More complicated assertions are coded as subpatterns. There are two
+ kinds: those that look ahead of the current position in the subject
+ string, and those that look behind it, and in each case an assertion
+ may be positive (must succeed for matching to continue) or negative
+ (must not succeed for matching to continue). An assertion subpattern is
+ matched in the normal way, except that, when matching continues after a
+ successful assertion, the matching position in the subject string is as
+ it was before the assertion was processed.
+
+ Assertion subpatterns are not capturing subpatterns. If an assertion
+ contains capturing subpatterns within it, these are counted for the
+ purposes of numbering the capturing subpatterns in the whole pattern.
+ Within each branch of an assertion, locally captured substrings may be
+ referenced in the usual way. For example, a sequence such as (.)\g{-1}
+ can be used to check that two adjacent characters are the same.
When a branch within an assertion fails to match, any substrings that
were captured are discarded (as happens with any pattern branch that
- fails to match). A negative assertion is true only when all its
+ fails to match). A negative assertion succeeds only when all its
branches fail to match; this means that no captured substrings are ever
retained after a successful negative assertion. When an assertion con-
tains a matching branch, what happens depends on the type of assertion.
@@ -8130,49 +7925,33 @@ ASSERTIONS
For a positive assertion, internally captured substrings in the suc-
cessful branch are retained, and matching continues with the next pat-
tern item after the assertion. For a negative assertion, a matching
- branch means that the assertion is not true. If such an assertion is
- being used as a condition in a conditional group (see below), captured
+ branch means that the assertion has failed. If the assertion is being
+ used as a condition in a conditional subpattern (see below), captured
substrings are retained, because matching continues with the "no"
branch of the condition. For other failing negative assertions, control
passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
- For compatibility with Perl, most assertion groups may be repeated;
- though it makes no sense to assert the same thing several times, the
- side effect of capturing may occasionally be useful. However, an asser-
- tion that forms the condition for a conditional group may not be quan-
- tified. In practice, for other assertions, there only three cases:
+ For compatibility with Perl, most assertion subpatterns may be
+ repeated; though it makes no sense to assert the same thing several
+ times, the side effect of capturing parentheses may occasionally be
+ useful. However, an assertion that forms the condition for a condi-
+ tional subpattern may not be quantified. In practice, for other asser-
+ tions, there only three cases:
- (1) If the quantifier is {0}, the assertion is never obeyed during
- matching. However, it may contain internal capture groups that are
- called from elsewhere via the subroutine mechanism.
+ (1) If the quantifier is {0}, the assertion is never obeyed during
+ matching. However, it may contain internal capturing parenthesized
+ groups that are called from elsewhere via the subroutine mechanism.
- (2) If quantifier is {0,n} where n is greater than zero, it is treated
- as if it were {0,1}. At run time, the rest of the pattern match is
+ (2) If quantifier is {0,n} where n is greater than zero, it is treated
+ as if it were {0,1}. At run time, the rest of the pattern match is
tried with and without the assertion, the order depending on the greed-
iness of the quantifier.
- (3) If the minimum repetition is greater than zero, the quantifier is
- ignored. The assertion is obeyed just once when encountered during
+ (3) If the minimum repetition is greater than zero, the quantifier is
+ ignored. The assertion is obeyed just once when encountered during
matching.
- Alphabetic assertion names
-
- Traditionally, symbolic sequences such as (?= and (?<= have been used
- to specify lookaround assertions. Perl 5.28 introduced some experimen-
- tal alphabetic alternatives which might be easier to remember. They all
- start with (* instead of (? and must be written using lower case let-
- ters. PCRE2 supports the following synonyms:
-
- (*positive_lookahead: or (*pla: is the same as (?=
- (*negative_lookahead: or (*nla: is the same as (?!
- (*positive_lookbehind: or (*plb: is the same as (?<=
- (*negative_lookbehind: or (*nlb: is the same as (?<!
-
- For example, (*pla:foo) is the same assertion as (?=foo). In the fol-
- lowing sections, the various assertions are described using the origi-
- nal symbolic forms.
-
Lookahead assertions
Lookahead assertions start with (?= for positive assertions and (?! for
@@ -8180,38 +7959,38 @@ ASSERTIONS
\w+(?=;)
- matches a word followed by a semicolon, but does not include the semi-
+ matches a word followed by a semicolon, but does not include the semi-
colon in the match, and
foo(?!bar)
- matches any occurrence of "foo" that is not followed by "bar". Note
+ matches any occurrence of "foo" that is not followed by "bar". Note
that the apparently similar pattern
(?!foo)bar
- does not find an occurrence of "bar" that is preceded by something
- other than "foo"; it finds any occurrence of "bar" whatsoever, because
+ does not find an occurrence of "bar" that is preceded by something
+ other than "foo"; it finds any occurrence of "bar" whatsoever, because
the assertion (?!foo) is always true when the next three characters are
"bar". A lookbehind assertion is needed to achieve the other effect.
If you want to force a matching failure at some point in a pattern, the
- most convenient way to do it is with (?!) because an empty string
- always matches, so an assertion that requires there not to be an empty
+ most convenient way to do it is with (?!) because an empty string
+ always matches, so an assertion that requires there not to be an empty
string must always fail. The backtracking control verb (*FAIL) or (*F)
is a synonym for (?!).
Lookbehind assertions
- Lookbehind assertions start with (?<= for positive assertions and (?<!
+ Lookbehind assertions start with (?<= for positive assertions and (?<!
for negative assertions. For example,
(?<!foo)bar
- does find an occurrence of "bar" that is not preceded by "foo". The
- contents of a lookbehind assertion are restricted such that all the
+ does find an occurrence of "bar" that is not preceded by "foo". The
+ contents of a lookbehind assertion are restricted such that all the
strings it matches must have a fixed length. However, if there are sev-
- eral top-level alternatives, they do not all have to have the same
+ eral top-level alternatives, they do not all have to have the same
fixed length. Thus
(?<=bullock|donkey)
@@ -8220,74 +7999,74 @@ ASSERTIONS
(?<!dogs?|cats?)
- causes an error at compile time. Branches that match different length
- strings are permitted only at the top level of a lookbehind assertion.
+ causes an error at compile time. Branches that match different length
+ strings are permitted only at the top level of a lookbehind assertion.
This is an extension compared with Perl, which requires all branches to
match the same length of string. An assertion such as
(?<=ab(c|de))
- is not permitted, because its single top-level branch can match two
- different lengths, but it is acceptable to PCRE2 if rewritten to use
+ is not permitted, because its single top-level branch can match two
+ different lengths, but it is acceptable to PCRE2 if rewritten to use
two top-level branches:
(?<=abc|abde)
- In some cases, the escape sequence \K (see above) can be used instead
+ In some cases, the escape sequence \K (see above) can be used instead
of a lookbehind assertion to get round the fixed-length restriction.
- The implementation of lookbehind assertions is, for each alternative,
- to temporarily move the current position back by the fixed length and
+ The implementation of lookbehind assertions is, for each alternative,
+ to temporarily move the current position back by the fixed length and
then try to match. If there are insufficient characters before the cur-
rent position, the assertion fails.
- In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which
- matches a single code unit even in a UTF mode) to appear in lookbehind
- assertions, because it makes it impossible to calculate the length of
- the lookbehind. The \X and \R escapes, which can match different num-
+ In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which
+ matches a single code unit even in a UTF mode) to appear in lookbehind
+ assertions, because it makes it impossible to calculate the length of
+ the lookbehind. The \X and \R escapes, which can match different num-
bers of code units, are never permitted in lookbehinds.
- "Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
- lookbehinds, as long as the called capture group matches a fixed-length
- string. However, recursion, that is, a "subroutine" call into a group
- that is already active, is not supported.
+ "Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
+ lookbehinds, as long as the subpattern matches a fixed-length string.
+ However, recursion, that is, a "subroutine" call into a group that is
+ already active, is not supported.
Perl does not support backreferences in lookbehinds. PCRE2 does support
- them, but only if certain conditions are met. The
- PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no use
- of (?| in the pattern (it creates duplicate group numbers), and if the
- backreference is by name, the name must be unique. Of course, the ref-
- erenced group must itself match a fixed length substring. The following
+ them, but only if certain conditions are met. The
+ PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no use
+ of (?| in the pattern (it creates duplicate subpattern numbers), and if
+ the backreference is by name, the name must be unique. Of course, the
+ referenced subpattern must itself be of fixed length. The following
pattern matches words containing at least two characters that begin and
end with the same character:
\b(\w)\w++(?<=\1)
- Possessive quantifiers can be used in conjunction with lookbehind
+ Possessive quantifiers can be used in conjunction with lookbehind
assertions to specify efficient matching of fixed-length strings at the
end of subject strings. Consider a simple pattern such as
abcd$
- when applied to a long string that does not match. Because matching
- proceeds from left to right, PCRE2 will look for each "a" in the sub-
- ject and then see if what follows matches the rest of the pattern. If
+ when applied to a long string that does not match. Because matching
+ proceeds from left to right, PCRE2 will look for each "a" in the sub-
+ ject and then see if what follows matches the rest of the pattern. If
the pattern is specified as
^.*abcd$
- the initial .* matches the entire string at first, but when this fails
+ the initial .* matches the entire string at first, but when this fails
(because there is no following "a"), it backtracks to match all but the
- last character, then all but the last two characters, and so on. Once
- again the search for "a" covers the entire string, from right to left,
+ last character, then all but the last two characters, and so on. Once
+ again the search for "a" covers the entire string, from right to left,
so we are no better off. However, if the pattern is written as
^.*+(?<=abcd)
there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbe-
- hind assertion does a single test on the last four characters. If it
- fails, the match fails immediately. For long strings, this approach
+ hind assertion does a single test on the last four characters. If it
+ fails, the match fails immediately. For long strings, this approach
makes a significant difference to the processing time.
Using multiple assertions
@@ -8296,18 +8075,18 @@ ASSERTIONS
(?<=\d{3})(?<!999)foo
- matches "foo" preceded by three digits that are not "999". Notice that
- each of the assertions is applied independently at the same point in
- the subject string. First there is a check that the previous three
- characters are all digits, and then there is a check that the same
+ matches "foo" preceded by three digits that are not "999". Notice that
+ each of the assertions is applied independently at the same point in
+ the subject string. First there is a check that the previous three
+ characters are all digits, and then there is a check that the same
three characters are not "999". This pattern does not match "foo" pre-
- ceded by six characters, the first of which are digits and the last
- three of which are not "999". For example, it doesn't match "123abc-
+ ceded by six characters, the first of which are digits and the last
+ three of which are not "999". For example, it doesn't match "123abc-
foo". A pattern to do that is
(?<=\d{3}...)(?<!999)foo
- This time the first assertion looks at the preceding six characters,
+ This time the first assertion looks at the preceding six characters,
checking that the first three are digits, and then the second assertion
checks that the preceding three characters are not "999".
@@ -8315,77 +8094,22 @@ ASSERTIONS
(?<=(?<!foo)bar)baz
- matches an occurrence of "baz" that is preceded by "bar" which in turn
+ matches an occurrence of "baz" that is preceded by "bar" which in turn
is not preceded by "foo", while
(?<=\d{3}(?!999)...)foo
- is another pattern that matches "foo" preceded by three digits and any
+ is another pattern that matches "foo" preceded by three digits and any
three characters that are not "999".
-SCRIPT RUNS
-
- In concept, a script run is a sequence of characters that are all from
- the same Unicode script such as Latin or Greek. However, because some
- scripts are commonly used together, and because some diacritical and
- other marks are used with multiple scripts, it is not that simple.
- There is a full description of the rules that PCRE2 uses in the section
- entitled "Script Runs" in the pcre2unicode documentation.
-
- If part of a pattern is enclosed between (*script_run: or (*sr: and a
- closing parenthesis, it fails if the sequence of characters that it
- matches are not a script run. After a failure, normal backtracking
- occurs. Script runs can be used to detect spoofing attacks using char-
- acters that look the same, but are from different scripts. The string
- "paypal.com" is an infamous example, where the letters could be a mix-
- ture of Latin and Cyrillic. This pattern ensures that the matched char-
- acters in a sequence of non-spaces that follow white space are a script
- run:
-
- \s+(*sr:\S+)
-
- To be sure that they are all from the Latin script (for example), a
- lookahead can be used:
-
- \s+(?=\p{Latin})(*sr:\S+)
-
- This works as long as the first character is expected to be a character
- in that script, and not (for example) punctuation, which is allowed
- with any script. If this is not the case, a more creative lookahead is
- needed. For example, if digits, underscore, and dots are permitted at
- the start:
-
- \s+(?=[0-9_.]*\p{Latin})(*sr:\S+)
-
-
- In many cases, backtracking into a script run pattern fragment is not
- desirable. The script run can employ an atomic group to prevent this.
- Because this is a common requirement, a shorthand notation is provided
- by (*atomic_script_run: or (*asr:
-
- (*asr:...) is the same as (*sr:(?>...))
-
- Note that the atomic group is inside the script run. Putting it outside
- would not prevent backtracking into the script run pattern.
+CONDITIONAL SUBPATTERNS
- Support for script runs is not available if PCRE2 is compiled without
- Unicode support. A compile-time error is given if any of the above con-
- structs is encountered. Script runs are not supported by the alternate
- matching function, pcre2_dfa_match() because they use the same mecha-
- nism as capturing parentheses.
-
- Warning: The (*ACCEPT) control verb (see below) should not be used
- within a script run group, because it causes an immediate exit from the
- group, bypassing the script run checking.
-
-
-CONDITIONAL GROUPS
-
- It is possible to cause the matching process to obey a pattern fragment
- conditionally or to choose between two alternative fragments, depending
- on the result of an assertion, or whether a specific capture group has
- already been matched. The two possible forms of conditional group are:
+ It is possible to cause the matching process to obey a subpattern con-
+ ditionally or to choose between two alternative subpatterns, depending
+ on the result of an assertion, or whether a specific capturing subpat-
+ tern has already been matched. The two possible forms of conditional
+ subpattern are:
(?(condition)yes-pattern)
(?(condition)yes-pattern|no-pattern)
@@ -8393,33 +8117,33 @@ CONDITIONAL GROUPS
If the condition is satisfied, the yes-pattern is used; otherwise the
no-pattern (if present) is used. An absent no-pattern is equivalent to
an empty string (it always matches). If there are more than two alter-
- natives in the group, a compile-time error occurs. Each of the two
- alternatives may itself contain nested groups of any form, including
- conditional groups; the restriction to two alternatives applies only at
- the level of the condition itself. This pattern fragment is an example
- where the alternatives are complex:
+ natives in the subpattern, a compile-time error occurs. Each of the two
+ alternatives may itself contain nested subpatterns of any form, includ-
+ ing conditional subpatterns; the restriction to two alternatives
+ applies only at the level of the condition. This pattern fragment is an
+ example where the alternatives are complex:
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
- There are five kinds of condition: references to capture groups, refer-
+ There are five kinds of condition: references to subpatterns, refer-
ences to recursion, two pseudo-conditions called DEFINE and VERSION,
and assertions.
- Checking for a used capture group by number
+ Checking for a used subpattern by number
If the text between the parentheses consists of a sequence of digits,
- the condition is true if a capture group of that number has previously
- matched. If there is more than one capture group with the same number
- (see the earlier section about duplicate group numbers), the condition
- is true if any of them have matched. An alternative notation is to pre-
- cede the digits with a plus or minus sign. In this case, the group num-
- ber is relative rather than absolute. The most recently opened capture
- group can be referenced by (?(-1), the next most recent by (?(-2), and
- so on. Inside loops it can also make sense to refer to subsequent
- groups. The next capture group can be referenced as (?(+1), and so on.
- (The value zero in any of these forms is not used; it provokes a com-
- pile-time error.)
+ the condition is true if a capturing subpattern of that number has pre-
+ viously matched. If there is more than one capturing subpattern with
+ the same number (see the earlier section about duplicate subpattern
+ numbers), the condition is true if any of them have matched. An alter-
+ native notation is to precede the digits with a plus or minus sign. In
+ this case, the subpattern number is relative rather than absolute. The
+ most recently opened parentheses can be referenced by (?(-1), the next
+ most recent by (?(-2), and so on. Inside loops it can also make sense
+ to refer to subsequent groups. The next parentheses to be opened can be
+ referenced as (?(+1), and so on. (The value zero in any of these forms
+ is not used; it provokes a compile-time error.)
Consider the following pattern, which contains non-significant white
space to make it more readable (assume the PCRE2_EXTENDED option) and
@@ -8430,13 +8154,13 @@ CONDITIONAL GROUPS
The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The sec-
ond part matches one or more characters that are not parentheses. The
- third part is a conditional group that tests whether or not the first
- capture group matched. If it did, that is, if subject started with an
- opening parenthesis, the condition is true, and so the yes-pattern is
- executed and a closing parenthesis is required. Otherwise, since no-
- pattern is not present, the conditional group matches nothing. In other
- words, this pattern matches a sequence of non-parentheses, optionally
- enclosed in parentheses.
+ third part is a conditional subpattern that tests whether or not the
+ first set of parentheses matched. If they did, that is, if subject
+ started with an opening parenthesis, the condition is true, and so the
+ yes-pattern is executed and a closing parenthesis is required. Other-
+ wise, since no-pattern is not present, the subpattern matches nothing.
+ In other words, this pattern matches a sequence of non-parentheses,
+ optionally enclosed in parentheses.
If you were embedding this pattern in a larger one, you could use a
relative reference:
@@ -8446,104 +8170,104 @@ CONDITIONAL GROUPS
This makes the fragment independent of the parentheses in the larger
pattern.
- Checking for a used capture group by name
+ Checking for a used subpattern by name
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
- used capture group by name. For compatibility with earlier versions of
+ used subpattern by name. For compatibility with earlier versions of
PCRE1, which had this facility before Perl, the syntax (?(name)...) is
- also recognized. Note, however, that undelimited names consisting of
+ also recognized. Note, however, that undelimited names consisting of
the letter R followed by digits are ambiguous (see the following sec-
- tion). Rewriting the above example to use a named group gives this:
+ tion).
+
+ Rewriting the above example to use a named subpattern gives this:
(?<OPEN> \( )? [^()]+ (?(<OPEN>) \) )
If the name used in a condition of this kind is a duplicate, the test
- is applied to all groups of the same name, and is true if any one of
- them has matched.
+ is applied to all subpatterns of the same name, and is true if any one
+ of them has matched.
Checking for pattern recursion
"Recursion" in this sense refers to any subroutine-like call from one
part of the pattern to another, whether or not it is actually recur-
- sive. See the sections entitled "Recursive patterns" and "Groups as
- subroutines" below for details of recursion and subroutine calls.
-
- If a condition is the string (R), and there is no capture group with
- the name R, the condition is true if matching is currently in a recur-
- sion or subroutine call to the whole pattern or any capture group. If
- digits follow the letter R, and there is no group with that name, the
- condition is true if the most recent call is into a group with the
+ sive. See the sections entitled "Recursive patterns" and "Subpatterns
+ as subroutines" below for details of recursion and subpattern calls.
+
+ If a condition is the string (R), and there is no subpattern with the
+ name R, the condition is true if matching is currently in a recursion
+ or subroutine call to the whole pattern or any subpattern. If digits
+ follow the letter R, and there is no subpattern with that name, the
+ condition is true if the most recent call is into a subpattern with the
given number, which must exist somewhere in the overall pattern. This
is a contrived example that is equivalent to a+b:
((?(R1)a+|(?1)b))
- However, in both cases, if there is a capture group with a matching
- name, the condition tests for its being set, as described in the sec-
- tion above, instead of testing for recursion. For example, creating a
- group with the name R1 by adding (?<R1>) to the above pattern com-
- pletely changes its meaning.
+ However, in both cases, if there is a subpattern with a matching name,
+ the condition tests for its being set, as described in the section
+ above, instead of testing for recursion. For example, creating a group
+ with the name R1 by adding (?<R1>) to the above pattern completely
+ changes its meaning.
If a name preceded by ampersand follows the letter R, for example:
(?(R&name)...)
- the condition is true if the most recent recursion is into a group of
- that name (which must exist within the pattern).
+ the condition is true if the most recent recursion is into a subpattern
+ of that name (which must exist within the pattern).
This condition does not check the entire recursion stack. It tests only
the current level. If the name used in a condition of this kind is a
- duplicate, the test is applied to all groups of the same name, and is
- true if any one of them is the most recent recursion.
+ duplicate, the test is applied to all subpatterns of the same name, and
+ is true if any one of them is the most recent recursion.
At "top level", all these recursion test conditions are false.
- Defining capture groups for use by reference only
+ Defining subpatterns for use by reference only
If the condition is the string (DEFINE), the condition is always false,
even if there is a group with the name DEFINE. In this case, there may
- be only one alternative in the rest of the conditional group. It is
- always skipped if control reaches this point in the pattern; the idea
- of DEFINE is that it can be used to define subroutines that can be ref-
- erenced from elsewhere. (The use of subroutines is described below.)
- For example, a pattern to match an IPv4 address such as
- "192.168.23.245" could be written like this (ignore white space and
- line breaks):
+ be only one alternative in the subpattern. It is always skipped if con-
+ trol reaches this point in the pattern; the idea of DEFINE is that it
+ can be used to define subroutines that can be referenced from else-
+ where. (The use of subroutines is described below.) For example, a pat-
+ tern to match an IPv4 address such as "192.168.23.245" could be written
+ like this (ignore white space and line breaks):
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
\b (?&byte) (\.(?&byte)){3} \b
- The first part of the pattern is a DEFINE group inside which a another
- group named "byte" is defined. This matches an individual component of
- an IPv4 address (a number less than 256). When matching takes place,
- this part of the pattern is skipped because DEFINE acts like a false
- condition. The rest of the pattern uses references to the named group
- to match the four dot-separated components of an IPv4 address, insist-
+ The first part of the pattern is a DEFINE group inside which a another
+ group named "byte" is defined. This matches an individual component of
+ an IPv4 address (a number less than 256). When matching takes place,
+ this part of the pattern is skipped because DEFINE acts like a false
+ condition. The rest of the pattern uses references to the named group
+ to match the four dot-separated components of an IPv4 address, insist-
ing on a word boundary at each end.
Checking the PCRE2 version
- Programs that link with a PCRE2 library can check the version by call-
- ing pcre2_config() with appropriate arguments. Users of applications
- that do not have access to the underlying code cannot do this. A spe-
- cial "condition" called VERSION exists to allow such users to discover
+ Programs that link with a PCRE2 library can check the version by call-
+ ing pcre2_config() with appropriate arguments. Users of applications
+ that do not have access to the underlying code cannot do this. A spe-
+ cial "condition" called VERSION exists to allow such users to discover
which version of PCRE2 they are dealing with by using this condition to
- match a string such as "yesno". VERSION must be followed either by "="
+ match a string such as "yesno". VERSION must be followed either by "="
or ">=" and a version number. For example:
(?(VERSION>=10.4)yes|no)
- This pattern matches "yes" if the PCRE2 version is greater or equal to
- 10.4, or "no" otherwise. The fractional part of the version number may
+ This pattern matches "yes" if the PCRE2 version is greater or equal to
+ 10.4, or "no" otherwise. The fractional part of the version number may
not contain more than two digits.
Assertion conditions
- If the condition is not in any of the above formats, it must be a
- parenthesized assertion. This may be a positive or negative lookahead
- or lookbehind assertion. Consider this pattern, again containing non-
- significant white space, and with the two alternatives on the second
- line:
+ If the condition is not in any of the above formats, it must be an
+ assertion. This may be a positive or negative lookahead or lookbehind
+ assertion. Consider this pattern, again containing non-significant
+ white space, and with the two alternatives on the second line:
(?(?=[^a-z]*[a-z])
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
@@ -8556,12 +8280,12 @@ CONDITIONAL GROUPS
strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
letters and dd are digits.
- When an assertion that is a condition contains capture groups, any cap-
- turing that occurs in a matching branch is retained afterwards, for
- both positive and negative assertions, because matching always contin-
- ues after the assertion, whether it succeeds or fails. (Compare non-
- conditional assertions, for which captures are retained only for posi-
- tive assertions that succeed.)
+ When an assertion that is a condition contains capturing subpatterns,
+ any capturing that occurs in a matching branch is retained afterwards,
+ for both positive and negative assertions, because matching always con-
+ tinues after the assertion, whether it succeeds or fails. (Compare non-
+ conditional assertions, when captures are retained only for positive
+ assertions that succeed.)
COMMENTS
@@ -8569,8 +8293,8 @@ COMMENTS
There are two ways of including comments in patterns that are processed
by PCRE2. In both cases, the start of the comment must not be in a
character class, nor in the middle of any other sequence of related
- characters such as (?: or a group name or number. The characters that
- make up a comment play no part in the pattern matching.
+ characters such as (?: or a subpattern name or number. The characters
+ that make up a comment play no part in the pattern matching.
The sequence (?# marks the start of a comment that continues up to the
next closing parenthesis. Nested parentheses are not permitted. If the
@@ -8615,14 +8339,14 @@ RECURSIVE PATTERNS
Obviously, PCRE2 cannot support the interpolation of Perl code.
Instead, it supports special syntax for recursion of the entire pat-
- tern, and also for individual capture group recursion. After its intro-
- duction in PCRE1 and Python, this kind of recursion was subsequently
+ tern, and also for individual subpattern recursion. After its introduc-
+ tion in PCRE1 and Python, this kind of recursion was subsequently
introduced into Perl at release 5.10.
A special item that consists of (? followed by a number greater than
zero and a closing parenthesis is a recursive subroutine call of the
- capture group of the given number, provided that it occurs inside that
- group. (If not, it is a non-recursive subroutine call, which is
+ subpattern of the given number, provided that it occurs inside that
+ subpattern. (If not, it is a non-recursive subroutine call, which is
described in the next section.) The special item (?R) or (?0) is a
recursive call of the entire regular expression.
@@ -8653,136 +8377,137 @@ RECURSIVE PATTERNS
words, a negative number counts capturing parentheses leftwards from
the point at which it is encountered.
- Be aware however, that if duplicate capture group numbers are in use,
- relative references refer to the earliest group with the appropriate
+ Be aware however, that if duplicate subpattern numbers are in use, rel-
+ ative references refer to the earliest subpattern with the appropriate
number. Consider, for example:
(?|(a)|(b)) (c) (?-2)
- The first two capture groups (a) and (b) are both numbered 1, and group
- (c) is number 2. When the reference (?-2) is encountered, the second
- most recently opened parentheses has the number 1, but it is the first
- such group (the (a) group) to which the recursion refers. This would be
- the same if an absolute reference (?1) was used. In other words, rela-
- tive references are just a shorthand for computing a group number.
-
- It is also possible to refer to subsequent capture groups, by writing
- references such as (?+2). However, these cannot be recursive because
- the reference is not inside the parentheses that are referenced. They
- are always non-recursive subroutine calls, as described in the next
- section.
-
- An alternative approach is to use named parentheses. The Perl syntax
- for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
+ The first two capturing groups (a) and (b) are both numbered 1, and
+ group (c) is number 2. When the reference (?-2) is encountered, the
+ second most recently opened parentheses has the number 1, but it is the
+ first such group (the (a) group) to which the recursion refers. This
+ would be the same if an absolute reference (?1) was used. In other
+ words, relative references are just a shorthand for computing a group
+ number.
+
+ It is also possible to refer to subsequently opened parentheses, by
+ writing references such as (?+2). However, these cannot be recursive
+ because the reference is not inside the parentheses that are refer-
+ enced. They are always non-recursive subroutine calls, as described in
+ the next section.
+
+ An alternative approach is to use named parentheses. The Perl syntax
+ for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
ported. We could rewrite the above example as follows:
(?<pn> \( ( [^()]++ | (?&pn) )* \) )
- If there is more than one group with the same name, the earliest one is
- used.
+ If there is more than one subpattern with the same name, the earliest
+ one is used.
The example pattern that we have been looking at contains nested unlim-
- ited repeats, and so the use of a possessive quantifier for matching
- strings of non-parentheses is important when applying the pattern to
+ ited repeats, and so the use of a possessive quantifier for matching
+ strings of non-parentheses is important when applying the pattern to
strings that do not match. For example, when this pattern is applied to
(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
- it yields "no match" quickly. However, if a possessive quantifier is
- not used, the match runs for a very long time indeed because there are
- so many different ways the + and * repeats can carve up the subject,
+ it yields "no match" quickly. However, if a possessive quantifier is
+ not used, the match runs for a very long time indeed because there are
+ so many different ways the + and * repeats can carve up the subject,
and all have to be tested before failure can be reported.
- At the end of a match, the values of capturing parentheses are those
- from the outermost level. If you want to obtain intermediate values, a
+ At the end of a match, the values of capturing parentheses are those
+ from the outermost level. If you want to obtain intermediate values, a
callout function can be used (see below and the pcre2callout documenta-
tion). If the pattern above is matched against
(ab(cd)ef)
- the value for the inner capturing parentheses (numbered 2) is "ef",
- which is the last value taken on at the top level. If a capture group
- is not matched at the top level, its final captured value is unset,
- even if it was (temporarily) set at a deeper level during the matching
- process.
+ the value for the inner capturing parentheses (numbered 2) is "ef",
+ which is the last value taken on at the top level. If a capturing sub-
+ pattern is not matched at the top level, its final captured value is
+ unset, even if it was (temporarily) set at a deeper level during the
+ matching process.
- Do not confuse the (?R) item with the condition (R), which tests for
- recursion. Consider this pattern, which matches text in angle brack-
- ets, allowing for arbitrary nesting. Only digits are allowed in nested
- brackets (that is, when recursing), whereas any characters are permit-
+ Do not confuse the (?R) item with the condition (R), which tests for
+ recursion. Consider this pattern, which matches text in angle brack-
+ ets, allowing for arbitrary nesting. Only digits are allowed in nested
+ brackets (that is, when recursing), whereas any characters are permit-
ted at the outer level.
< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
- In this pattern, (?(R) is the start of a conditional group, with two
- different alternatives for the recursive and non-recursive cases. The
- (?R) item is the actual recursive call.
+ In this pattern, (?(R) is the start of a conditional subpattern, with
+ two different alternatives for the recursive and non-recursive cases.
+ The (?R) item is the actual recursive call.
Differences in recursion processing between PCRE2 and Perl
Some former differences between PCRE2 and Perl no longer exist.
- Before release 10.30, recursion processing in PCRE2 differed from Perl
- in that a recursive subroutine call was always treated as an atomic
- group. That is, once it had matched some of the subject string, it was
- never re-entered, even if it contained untried alternatives and there
- was a subsequent matching failure. (Historical note: PCRE implemented
+ Before release 10.30, recursion processing in PCRE2 differed from Perl
+ in that a recursive subpattern call was always treated as an atomic
+ group. That is, once it had matched some of the subject string, it was
+ never re-entered, even if it contained untried alternatives and there
+ was a subsequent matching failure. (Historical note: PCRE implemented
recursion before Perl did.)
- Starting with release 10.30, recursive subroutine calls are no longer
+ Starting with release 10.30, recursive subroutine calls are no longer
treated as atomic. That is, they can be re-entered to try unused alter-
- natives if there is a matching failure later in the pattern. This is
- now compatible with the way Perl works. If you want a subroutine call
+ natives if there is a matching failure later in the pattern. This is
+ now compatible with the way Perl works. If you want a subroutine call
to be atomic, you must explicitly enclose it in an atomic group.
- Supporting backtracking into recursions simplifies certain types of
+ Supporting backtracking into recursions simplifies certain types of
recursive pattern. For example, this pattern matches palindromic
strings:
^((.)(?1)\2|.?)$
- The second branch in the group matches a single central character in
- the palindrome when there are an odd number of characters, or nothing
- when there are an even number of characters, but in order to work it
- has to be able to try the second case when the rest of the pattern
+ The second branch in the group matches a single central character in
+ the palindrome when there are an odd number of characters, or nothing
+ when there are an even number of characters, but in order to work it
+ has to be able to try the second case when the rest of the pattern
match fails. If you want to match typical palindromic phrases, the pat-
- tern has to ignore all non-word characters, which can be done like
+ tern has to ignore all non-word characters, which can be done like
this:
^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$
- If run with the PCRE2_CASELESS option, this pattern matches phrases
- such as "A man, a plan, a canal: Panama!". Note the use of the posses-
- sive quantifier *+ to avoid backtracking into sequences of non-word
+ If run with the PCRE2_CASELESS option, this pattern matches phrases
+ such as "A man, a plan, a canal: Panama!". Note the use of the posses-
+ sive quantifier *+ to avoid backtracking into sequences of non-word
characters. Without this, PCRE2 takes a great deal longer (ten times or
- more) to match typical phrases, and Perl takes so long that you think
+ more) to match typical phrases, and Perl takes so long that you think
it has gone into a loop.
- Another way in which PCRE2 and Perl used to differ in their recursion
- processing is in the handling of captured values. Formerly in Perl,
- when a group was called recursively or as a subroutine (see the next
- section), it had no access to any values that were captured outside the
- recursion, whereas in PCRE2 these values can be referenced. Consider
- this pattern:
+ Another way in which PCRE2 and Perl used to differ in their recursion
+ processing is in the handling of captured values. Formerly in Perl,
+ when a subpattern was called recursively or as a subpattern (see the
+ next section), it had no access to any values that were captured out-
+ side the recursion, whereas in PCRE2 these values can be referenced.
+ Consider this pattern:
^(.)(\1|a(?2))
- This pattern matches "bab". The first capturing parentheses match "b",
+ This pattern matches "bab". The first capturing parentheses match "b",
then in the second group, when the backreference \1 fails to match "b",
the second alternative matches "a" and then recurses. In the recursion,
- \1 does now match "b" and so the whole match succeeds. This match used
+ \1 does now match "b" and so the whole match succeeds. This match used
to fail in Perl, but in later versions (I tried 5.024) it now works.
-GROUPS AS SUBROUTINES
+SUBPATTERNS AS SUBROUTINES
- If the syntax for a recursive group call (either by number or by name)
- is used outside the parentheses to which it refers, it operates a bit
- like a subroutine in a programming language. More accurately, PCRE2
- treats the referenced group as an independent subpattern which it tries
- to match at the current matching position. The called group may be
- defined before or after the reference. A numbered reference can be
- absolute or relative, as in these examples:
+ If the syntax for a recursive subpattern call (either by number or by
+ name) is used outside the parentheses to which it refers, it operates a
+ bit like a subroutine in a programming language. More accurately, PCRE2
+ treats the referenced subpattern as an independent subpattern which it
+ tries to match at the current matching position. The called subpattern
+ may be defined before or after the reference. A numbered reference can
+ be absolute or relative, as in these examples:
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -8792,106 +8517,106 @@ GROUPS AS SUBROUTINES
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
+ matches "sense and sensibility" and "response and responsibility", but
not "sense and responsibility". If instead the pattern
(sens|respons)e and (?1)ibility
- is used, it does match "sense and responsibility" as well as the other
- two strings. Another example is given in the discussion of DEFINE
+ is used, it does match "sense and responsibility" as well as the other
+ two strings. Another example is given in the discussion of DEFINE
above.
- Like recursions, subroutine calls used to be treated as atomic, but
- this changed at PCRE2 release 10.30, so backtracking into subroutine
- calls can now occur. However, any capturing parentheses that are set
+ Like recursions, subroutine calls used to be treated as atomic, but
+ this changed at PCRE2 release 10.30, so backtracking into subroutine
+ calls can now occur. However, any capturing parentheses that are set
during the subroutine call revert to their previous values afterwards.
- Processing options such as case-independence are fixed when a group is
- defined, so if it is used as a subroutine, such options cannot be
- changed for different calls. For example, consider this pattern:
+ Processing options such as case-independence are fixed when a subpat-
+ tern is defined, so if it is used as a subroutine, such options cannot
+ be changed for different calls. For example, consider this pattern:
(abc)(?i:(?-1))
- It matches "abcabc". It does not match "abcABC" because the change of
- processing option does not affect the called group.
+ It matches "abcabc". It does not match "abcABC" because the change of
+ processing option does not affect the called subpattern.
- The behaviour of backtracking control verbs in groups when called as
- subroutines is described in the section entitled "Backtracking verbs in
- subroutines" below.
+ The behaviour of backtracking control verbs in subpatterns when called
+ as subroutines is described in the section entitled "Backtracking verbs
+ in subroutines" below.
ONIGURUMA SUBROUTINE SYNTAX
- For compatibility with Oniguruma, the non-Perl syntax \g followed by a
+ For compatibility with Oniguruma, the non-Perl syntax \g followed by a
name or a number enclosed either in angle brackets or single quotes, is
- an alternative syntax for calling a group as a subroutine, possibly
- recursively. Here are two of the examples used above, rewritten using
- this syntax:
+ an alternative syntax for referencing a subpattern as a subroutine,
+ possibly recursively. Here are two of the examples used above, rewrit-
+ ten using this syntax:
(?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
(sens|respons)e and \g'1'ibility
- PCRE2 supports an extension to Oniguruma: if a number is preceded by a
+ PCRE2 supports an extension to Oniguruma: if a number is preceded by a
plus or a minus sign it is taken as a relative reference. For example:
(abc)(?i:\g<-1>)
- Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
- synonymous. The former is a backreference; the latter is a subroutine
+ Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
+ synonymous. The former is a backreference; the latter is a subroutine
call.
CALLOUTS
Perl has a feature whereby using the sequence (?{...}) causes arbitrary
- Perl code to be obeyed in the middle of matching a regular expression.
+ Perl code to be obeyed in the middle of matching a regular expression.
This makes it possible, amongst other things, to extract different sub-
strings that match the same pair of parentheses when there is a repeti-
tion.
- PCRE2 provides a similar feature, but of course it cannot obey arbi-
- trary Perl code. The feature is called "callout". The caller of PCRE2
- provides an external function by putting its entry point in a match
- context using the function pcre2_set_callout(), and then passing that
- context to pcre2_match() or pcre2_dfa_match(). If no match context is
+ PCRE2 provides a similar feature, but of course it cannot obey arbi-
+ trary Perl code. The feature is called "callout". The caller of PCRE2
+ provides an external function by putting its entry point in a match
+ context using the function pcre2_set_callout(), and then passing that
+ context to pcre2_match() or pcre2_dfa_match(). If no match context is
passed, or if the callout entry point is set to NULL, callouts are dis-
abled.
- Within a regular expression, (?C<arg>) indicates a point at which the
- external function is to be called. There are two kinds of callout:
- those with a numerical argument and those with a string argument. (?C)
- on its own with no argument is treated as (?C0). A numerical argument
- allows the application to distinguish between different callouts.
- String arguments were added for release 10.20 to make it possible for
- script languages that use PCRE2 to embed short scripts within patterns
+ Within a regular expression, (?C<arg>) indicates a point at which the
+ external function is to be called. There are two kinds of callout:
+ those with a numerical argument and those with a string argument. (?C)
+ on its own with no argument is treated as (?C0). A numerical argument
+ allows the application to distinguish between different callouts.
+ String arguments were added for release 10.20 to make it possible for
+ script languages that use PCRE2 to embed short scripts within patterns
in a similar way to Perl.
During matching, when PCRE2 reaches a callout point, the external func-
- tion is called. It is provided with the number or string argument of
- the callout, the position in the pattern, and one item of data that is
+ tion is called. It is provided with the number or string argument of
+ the callout, the position in the pattern, and one item of data that is
also set in the match block. The callout function may cause matching to
proceed, to backtrack, or to fail.
- By default, PCRE2 implements a number of optimizations at matching
- time, and one side-effect is that sometimes callouts are skipped. If
- you need all possible callouts to happen, you need to set options that
- disable the relevant optimizations. More details, including a complete
- description of the programming interface to the callout function, are
+ By default, PCRE2 implements a number of optimizations at matching
+ time, and one side-effect is that sometimes callouts are skipped. If
+ you need all possible callouts to happen, you need to set options that
+ disable the relevant optimizations. More details, including a complete
+ description of the programming interface to the callout function, are
given in the pcre2callout documentation.
Callouts with numerical arguments
- If you just want to have a means of identifying different callout
- points, put a number less than 256 after the letter C. For example,
+ If you just want to have a means of identifying different callout
+ points, put a number less than 256 after the letter C. For example,
this pattern has two callout points:
(?C1)abc(?C2)def
- If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
- callouts are automatically installed before each item in the pattern.
- They are all numbered 255. If there is a conditional group in the pat-
+ If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
+ callouts are automatically installed before each item in the pattern.
+ They are all numbered 255. If there is a conditional group in the pat-
tern whose condition is an assertion, an additional callout is inserted
- just before the condition. An explicit callout may also be set at this
+ just before the condition. An explicit callout may also be set at this
position, as in this example:
(?(?C9)(?=a)abc|def)
@@ -8901,26 +8626,25 @@ CALLOUTS
Callouts with string arguments
- A delimited string may be used instead of a number as a callout argu-
- ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
+ A delimited string may be used instead of a number as a callout argu-
+ ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
ending delimiter is the same as the start, except for {, where the end-
- ing delimiter is }. If the ending delimiter is needed within the
+ ing delimiter is }. If the ending delimiter is needed within the
string, it must be doubled. For example:
(?C'ab ''c'' d')xyz(?C{any text})pqr
- The doubling is removed before the string is passed to the callout
+ The doubling is removed before the string is passed to the callout
function.
BACKTRACKING CONTROL
- There are a number of special "Backtracking Control Verbs" (to use
- Perl's terminology) that modify the behaviour of backtracking during
- matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
- verbs take either form, possibly behaving differently depending on
- whether or not a name is present. The names are not required to be
- unique within the pattern.
+ There are a number of special "Backtracking Control Verbs" (to use
+ Perl's terminology) that modify the behaviour of backtracking during
+ matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
+ verbs take either form, possibly behaving differently depending on
+ whether or not a name is present.
By default, for compatibility with Perl, a name is any sequence of
characters that does not include a closing parenthesis. The name is not
@@ -8956,8 +8680,8 @@ BACKTRACKING CONTROL
by the DFA matching function.
The behaviour of these verbs in repeated groups, assertions, and in
- capture groups called as subroutines (whether or not recursively) is
- documented below.
+ subpatterns called as subroutines (whether or not recursively) is docu-
+ mented below.
Optimizations that affect backtracking verbs
@@ -8982,8 +8706,8 @@ BACKTRACKING CONTROL
(*ACCEPT) or (*ACCEPT:NAME)
This verb causes the match to end successfully, skipping the remainder
- of the pattern. However, when it is inside a capture group that is
- called as a subroutine, only that group is ended successfully. Matching
+ of the pattern. However, when it is inside a subpattern that is called
+ as a subroutine, only that subpattern is ended successfully. Matching
then continues at the outer level. If (*ACCEPT) in triggered in a posi-
tive assertion, the assertion succeeds; in a negative assertion, the
assertion fails.
@@ -8996,10 +8720,6 @@ BACKTRACKING CONTROL
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
tured by the outer parentheses.
- Warning: (*ACCEPT) should not be used within a script run group,
- because it causes an immediate exit from the group, bypassing the
- script run checking.
-
(*FAIL) or (*FAIL:NAME)
This verb causes a matching failure, forcing backtracking to occur. It
@@ -9014,8 +8734,8 @@ BACKTRACKING CONTROL
A match with the string "aaaa" always fails, but the callout is taken
before each backtrack happens (in this example, 10 times).
- (*ACCEPT:NAME) and (*FAIL:NAME) are treated as (*MARK:NAME)(*ACCEPT)
- and (*MARK:NAME)(*FAIL), respectively.
+ (*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+ (*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
Recording which path was taken
@@ -9025,21 +8745,24 @@ BACKTRACKING CONTROL
(*MARK:NAME) or (*:NAME)
- A name is always required with this verb. For all the other backtrack-
- ing control verbs, a NAME argument is optional.
+ A name is always required with this verb. There may be as many
+ instances of (*MARK) as you like in a pattern, and their names do not
+ have to be unique.
- When a match succeeds, the name of the last-encountered mark name on
+ When a match succeeds, the name of the last-encountered (*MARK:NAME) on
the matching path is passed back to the caller as described in the sec-
tion entitled "Other information about the match" in the pcre2api docu-
- mentation. This applies to all instances of (*MARK) and other verbs,
- including those inside assertions and atomic groups. However, there are
- differences in those cases when (*MARK) is used in conjunction with
- (*SKIP) as described below.
+ mentation. This applies to all instances of (*MARK), including those
+ inside assertions and atomic groups. (There are differences in those
+ cases when (*MARK) is used in conjunction with (*SKIP) as described
+ below.)
+
+ As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+ associated NAME arguments. Whichever is last on the matching path is
+ passed back. See below for more details of these other verbs.
- The mark name that was last encountered on the matching path is passed
- back. A verb without a NAME argument is ignored for this purpose. Here
- is an example of pcre2test output, where the "mark" modifier requests
- the retrieval and outputting of (*MARK) data:
+ Here is an example of pcre2test output, where the "mark" modifier
+ requests the retrieval and outputting of (*MARK) data:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XY
@@ -9050,76 +8773,76 @@ BACKTRACKING CONTROL
MK: B
The (*MARK) name is tagged with "MK:" in this output, and in this exam-
- ple it indicates which of the two alternatives matched. This is a more
- efficient way of obtaining this information than putting each alterna-
+ ple it indicates which of the two alternatives matched. This is a more
+ efficient way of obtaining this information than putting each alterna-
tive in its own capturing parentheses.
- If a verb with a name is encountered in a positive assertion that is
- true, the name is recorded and passed back if it is the last-encoun-
+ If a verb with a name is encountered in a positive assertion that is
+ true, the name is recorded and passed back if it is the last-encoun-
tered. This does not happen for negative assertions or failing positive
assertions.
- After a partial match or a failed match, the last encountered name in
+ After a partial match or a failed match, the last encountered name in
the entire match process is returned. For example:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XP
No match, mark = B
- Note that in this unanchored example the mark is retained from the
+ Note that in this unanchored example the mark is retained from the
match attempt that started at the letter "X" in the subject. Subsequent
match attempts starting at "P" and then with an empty string do not get
as far as the (*MARK) item, but nevertheless do not reset it.
- If you are interested in (*MARK) values after failed matches, you
- should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
+ If you are interested in (*MARK) values after failed matches, you
+ should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
ensure that the match is always attempted.
Verbs that act after backtracking
The following verbs do nothing when they are encountered. Matching con-
- tinues with what follows, but if there is a subsequent match failure,
- causing a backtrack to the verb, a failure is forced. That is, back-
- tracking cannot pass to the left of the verb. However, when one of
+ tinues with what follows, but if there is a subsequent match failure,
+ causing a backtrack to the verb, a failure is forced. That is, back-
+ tracking cannot pass to the left of the verb. However, when one of
these verbs appears inside an atomic group or in a lookaround assertion
- that is true, its effect is confined to that group, because once the
- group has been matched, there is never any backtracking into it. Back-
+ that is true, its effect is confined to that group, because once the
+ group has been matched, there is never any backtracking into it. Back-
tracking from beyond an assertion or an atomic group ignores the entire
- group, and seeks a preceding backtracking point.
+ group, and seeks a preceeding backtracking point.
- These verbs differ in exactly what kind of failure occurs when back-
- tracking reaches them. The behaviour described below is what happens
- when the verb is not in a subroutine or an assertion. Subsequent sec-
+ These verbs differ in exactly what kind of failure occurs when back-
+ tracking reaches them. The behaviour described below is what happens
+ when the verb is not in a subroutine or an assertion. Subsequent sec-
tions cover these special cases.
(*COMMIT) or (*COMMIT:NAME)
- This verb causes the whole match to fail outright if there is a later
+ This verb causes the whole match to fail outright if there is a later
matching failure that causes backtracking to reach it. Even if the pat-
- tern is unanchored, no further attempts to find a match by advancing
- the starting point take place. If (*COMMIT) is the only backtracking
+ tern is unanchored, no further attempts to find a match by advancing
+ the starting point take place. If (*COMMIT) is the only backtracking
verb that is encountered, once it has been passed pcre2_match() is com-
mitted to finding a match at the current starting point, or not at all.
For example:
a+(*COMMIT)b
- This matches "xxaab" but not "aacaab". It can be thought of as a kind
+ This matches "xxaab" but not "aacaab". It can be thought of as a kind
of dynamic anchor, or "I've started, so I must finish."
- The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM-
- MIT). It is like (*MARK:NAME) in that the name is remembered for pass-
- ing back to the caller. However, (*SKIP:NAME) searches only for names
- that are set with (*MARK), ignoring those set by any of the other back-
- tracking verbs.
+ The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM-
+ MIT). It is like (*MARK:NAME) in that the name is remembered for pass-
+ ing back to the caller. However, (*SKIP:NAME) searches only for names
+ set with (*MARK), ignoring those set by (*COMMIT), (*PRUNE) and
+ (*THEN).
- If there is more than one backtracking verb in a pattern, a different
- one that follows (*COMMIT) may be triggered first, so merely passing
+ If there is more than one backtracking verb in a pattern, a different
+ one that follows (*COMMIT) may be triggered first, so merely passing
(*COMMIT) during a match does not always guarantee that a match must be
at this starting point.
- Note that (*COMMIT) at the start of a pattern is not the same as an
- anchor, unless PCRE2's start-of-match optimizations are turned off, as
+ Note that (*COMMIT) at the start of a pattern is not the same as an
+ anchor, unless PCRE2's start-of-match optimizations are turned off, as
shown in this output from pcre2test:
re> /(*COMMIT)abc/
@@ -9130,63 +8853,63 @@ BACKTRACKING CONTROL
data> xyzabc
No match
- For the first pattern, PCRE2 knows that any match must start with "a",
- so the optimization skips along the subject to "a" before applying the
- pattern to the first set of data. The match attempt then succeeds. The
- second pattern disables the optimization that skips along to the first
- character. The pattern is now applied starting at "x", and so the
- (*COMMIT) causes the match to fail without trying any other starting
+ For the first pattern, PCRE2 knows that any match must start with "a",
+ so the optimization skips along the subject to "a" before applying the
+ pattern to the first set of data. The match attempt then succeeds. The
+ second pattern disables the optimization that skips along to the first
+ character. The pattern is now applied starting at "x", and so the
+ (*COMMIT) causes the match to fail without trying any other starting
points.
(*PRUNE) or (*PRUNE:NAME)
- This verb causes the match to fail at the current starting position in
+ This verb causes the match to fail at the current starting position in
the subject if there is a later matching failure that causes backtrack-
- ing to reach it. If the pattern is unanchored, the normal "bumpalong"
- advance to the next starting character then happens. Backtracking can
- occur as usual to the left of (*PRUNE), before it is reached, or when
- matching to the right of (*PRUNE), but if there is no match to the
- right, backtracking cannot cross (*PRUNE). In simple cases, the use of
- (*PRUNE) is just an alternative to an atomic group or possessive quan-
+ ing to reach it. If the pattern is unanchored, the normal "bumpalong"
+ advance to the next starting character then happens. Backtracking can
+ occur as usual to the left of (*PRUNE), before it is reached, or when
+ matching to the right of (*PRUNE), but if there is no match to the
+ right, backtracking cannot cross (*PRUNE). In simple cases, the use of
+ (*PRUNE) is just an alternative to an atomic group or possessive quan-
tifier, but there are some uses of (*PRUNE) that cannot be expressed in
- any other way. In an anchored pattern (*PRUNE) has the same effect as
+ any other way. In an anchored pattern (*PRUNE) has the same effect as
(*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back
- to the caller. However, (*SKIP:NAME) searches only for names set with
- (*MARK), ignoring those set by other backtracking verbs.
+ to the caller. However, (*SKIP:NAME) searches only for names set with
+ (*MARK), ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
(*SKIP)
- This verb, when given without a name, is like (*PRUNE), except that if
- the pattern is unanchored, the "bumpalong" advance is not to the next
+ This verb, when given without a name, is like (*PRUNE), except that if
+ the pattern is unanchored, the "bumpalong" advance is not to the next
character, but to the position in the subject where (*SKIP) was encoun-
- tered. (*SKIP) signifies that whatever text was matched leading up to
- it cannot be part of a successful match if there is a later mismatch.
+ tered. (*SKIP) signifies that whatever text was matched leading up to
+ it cannot be part of a successful match if there is a later mismatch.
Consider:
a+(*SKIP)b
- If the subject is "aaaac...", after the first match attempt fails
- (starting at the first character in the string), the starting point
+ If the subject is "aaaac...", after the first match attempt fails
+ (starting at the first character in the string), the starting point
skips on to start the next attempt at "c". Note that a possessive quan-
- tifer does not have the same effect as this example; although it would
- suppress backtracking during the first match attempt, the second
- attempt would start at the second character instead of skipping on to
+ tifer does not have the same effect as this example; although it would
+ suppress backtracking during the first match attempt, the second
+ attempt would start at the second character instead of skipping on to
"c".
(*SKIP:NAME)
- When (*SKIP) has an associated name, its behaviour is modified. When
- such a (*SKIP) is triggered, the previous path through the pattern is
- searched for the most recent (*MARK) that has the same name. If one is
- found, the "bumpalong" advance is to the subject position that corre-
- sponds to that (*MARK) instead of to where (*SKIP) was encountered. If
+ When (*SKIP) has an associated name, its behaviour is modified. When
+ such a (*SKIP) is triggered, the previous path through the pattern is
+ searched for the most recent (*MARK) that has the same name. If one is
+ found, the "bumpalong" advance is to the subject position that corre-
+ sponds to that (*MARK) instead of to where (*SKIP) was encountered. If
no (*MARK) with a matching name is found, the (*SKIP) is ignored.
- The search for a (*MARK) name uses the normal backtracking mechanism,
- which means that it does not see (*MARK) settings that are inside
+ The search for a (*MARK) name uses the normal backtracking mechanism,
+ which means that it does not see (*MARK) settings that are inside
atomic groups or assertions, because they are never re-entered by back-
tracking. Compare the following pcre2test examples:
@@ -9200,18 +8923,19 @@ BACKTRACKING CONTROL
0: b
1: b
- In the first example, the (*MARK) setting is in an atomic group, so it
+ In the first example, the (*MARK) setting is in an atomic group, so it
is not seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored.
- This allows the second branch of the pattern to be tried at the first
- character position. In the second example, the (*MARK) setting is not
- in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it
+ This allows the second branch of the pattern to be tried at the first
+ character position. In the second example, the (*MARK) setting is not
+ in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it
backtracks, and this causes a new matching attempt to start at the sec-
- ond character. This time, the (*MARK) is never seen because "a" does
+ ond character. This time, the (*MARK) is never seen because "a" does
not match "b", so the matcher immediately jumps to the second branch of
the pattern.
- Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
- ignores names that are set by other backtracking verbs.
+ Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
+ ignores names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or
+ (*THEN:NAME).
(*THEN) or (*THEN:NAME)
@@ -9233,12 +8957,12 @@ BACKTRACKING CONTROL
The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back
to the caller. However, (*SKIP:NAME) searches only for names set with
- (*MARK), ignoring those set by other backtracking verbs.
+ (*MARK), ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
- A group that does not contain a | character is just a part of the
+ A subpattern that does not contain a | character is just a part of the
enclosing alternative; it is not a nested alternation with only one
- alternative. The effect of (*THEN) extends beyond such a group to the
- enclosing alternative. Consider this pattern, where A, B, etc. are
+ alternative. The effect of (*THEN) extends beyond such a subpattern to
+ the enclosing alternative. Consider this pattern, where A, B, etc. are
complex pattern fragments that do not contain any | characters at this
level:
@@ -9246,20 +8970,20 @@ BACKTRACKING CONTROL
If A and B are matched, but there is a failure in C, matching does not
backtrack into A; instead it moves to the next alternative, that is, D.
- However, if the group containing (*THEN) is given an alternative, it
- behaves differently:
+ However, if the subpattern containing (*THEN) is given an alternative,
+ it behaves differently:
A (B(*THEN)C | (*FAIL)) | D
- The effect of (*THEN) is now confined to the inner group. After a fail-
- ure in C, matching moves to (*FAIL), which causes the whole group to
- fail because there are no more alternatives to try. In this case,
- matching does backtrack into A.
+ The effect of (*THEN) is now confined to the inner subpattern. After a
+ failure in C, matching moves to (*FAIL), which causes the whole subpat-
+ tern to fail because there are no more alternatives to try. In this
+ case, matching does now backtrack into A.
- Note that a conditional group is not considered as having two alterna-
- tives, because only one is ever used. In other words, the | character
- in a conditional group has a different meaning. Ignoring white space,
- consider:
+ Note that a conditional subpattern is not considered as having two
+ alternatives, because only one is ever used. In other words, the |
+ character in a conditional subpattern has a different meaning. Ignoring
+ white space, consider:
^.*? (?(?=a) a | b(*THEN)c )
@@ -9267,10 +8991,10 @@ BACKTRACKING CONTROL
ungreedy, it initially matches zero characters. The condition (?=a)
then fails, the character "b" is matched, but "c" is not. At this
point, matching does not backtrack to .*? as might perhaps be expected
- from the presence of the | character. The conditional group is part of
- the single alternative that comprises the whole pattern, and so the
- match fails. (If there was a backtrack into .*?, allowing it to match
- "b", the match would succeed.)
+ from the presence of the | character. The conditional subpattern is
+ part of the single alternative that comprises the whole pattern, and so
+ the match fails. (If there was a backtrack into .*?, allowing it to
+ match "b", the match would succeed.)
The verbs just described provide four different "strengths" of control
when subsequent matching fails. (*THEN) is the weakest, carrying on the
@@ -9317,13 +9041,13 @@ BACKTRACKING CONTROL
(*FAIL) in any assertion has its normal effect: it forces an immediate
backtrack. The behaviour of the other backtracking verbs depends on
whether or not the assertion is standalone or acting as the condition
- in a conditional group.
+ in a conditional subpattern.
(*ACCEPT) in a standalone positive assertion causes the assertion to
- succeed without any further processing; captured strings and a mark
+ succeed without any further processing; captured strings and a (*MARK)
name (if set) are retained. In a standalone negative assertion,
(*ACCEPT) causes the assertion to fail without any further processing;
- captured substrings and any mark name are discarded.
+ captured substrings and any (*MARK) name are discarded.
If the assertion is a condition, (*ACCEPT) causes the condition to be
true for a positive assertion and false for a negative one; captured
@@ -9350,24 +9074,26 @@ BACKTRACKING CONTROL
Backtracking verbs in subroutines
- These behaviours occur whether or not the group is called recursively.
+ These behaviours occur whether or not the subpattern is called recur-
+ sively.
- (*ACCEPT) in a group called as a subroutine causes the subroutine match
- to succeed without any further processing. Matching then continues
- after the subroutine call. Perl documents this behaviour. Perl's treat-
- ment of the other verbs in subroutines is different in some cases.
+ (*ACCEPT) in a subpattern called as a subroutine causes the subroutine
+ match to succeed without any further processing. Matching then contin-
+ ues after the subroutine call. Perl documents this behaviour. Perl's
+ treatment of the other verbs in subroutines is different in some cases.
- (*FAIL) in a group called as a subroutine has its normal effect: it
- forces an immediate backtrack.
+ (*FAIL) in a subpattern called as a subroutine has its normal effect:
+ it forces an immediate backtrack.
- (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
- when triggered by being backtracked to in a group called as a subrou-
- tine. There is then a backtrack at the outer level.
+ (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
+ when triggered by being backtracked to in a subpattern called as a sub-
+ routine. There is then a backtrack at the outer level.
(*THEN), when triggered, skips to the next alternative in the innermost
- enclosing group that has alternatives (its normal behaviour). However,
- if there is no such group within the subroutine's group, the subroutine
- match fails and there is a backtrack at the outer level.
+ enclosing group within the subpattern that has alternatives (its normal
+ behaviour). However, if there is no such group within the subroutine
+ subpattern, the subroutine match fails and there is a backtrack at the
+ outer level.
SEE ALSO
@@ -9385,8 +9111,8 @@ AUTHOR
REVISION
- Last updated: 12 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 04 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -9410,9 +9136,9 @@ COMPILED PATTERN MEMORY USAGE
code, so that most simple patterns do not use much memory for storing
the compiled version. However, there is one case where the memory usage
of a compiled pattern can be unexpectedly large. If a parenthesized
- group has a quantifier with a minimum greater than 1 and/or a limited
- maximum, the whole group is repeated in the compiled code. For example,
- the pattern
+ subpattern has a quantifier with a minimum greater than 1 and/or a lim-
+ ited maximum, the whole subpattern is repeated in the compiled code.
+ For example, the pattern
(abc|def){2,4}
@@ -9620,8 +9346,8 @@ AUTHOR
REVISION
- Last updated: 03 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 25 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -9636,56 +9362,33 @@ SYNOPSIS
#include <pcre2posix.h>
- int pcre2_regcomp(regex_t *preg, const char *pattern,
+ int regcomp(regex_t *preg, const char *pattern,
int cflags);
- int pcre2_regexec(const regex_t *preg, const char *string,
+ int regexec(const regex_t *preg, const char *string,
size_t nmatch, regmatch_t pmatch[], int eflags);
- size_t pcre2_regerror(int errcode, const regex_t *preg,
+ size_t regerror(int errcode, const regex_t *preg,
char *errbuf, size_t errbuf_size);
- void pcre2_regfree(regex_t *preg);
+ void regfree(regex_t *preg);
DESCRIPTION
This set of functions provides a POSIX-style API for the PCRE2 regular
- expression 8-bit library. There are no POSIX-style wrappers for PCRE2's
- 16-bit and 32-bit libraries. See the pcre2api documentation for a
- description of PCRE2's native API, which contains much additional func-
- tionality.
-
- The functions described here are wrapper functions that ultimately call
- the PCRE2 native API. Their prototypes are defined in the pcre2posix.h
- header file, and they all have unique names starting with pcre2_. How-
- ever, the pcre2posix.h header also contains macro definitions that con-
- vert the standard POSIX names such regcomp() into pcre2_regcomp() etc.
- This means that a program can use the usual POSIX names without running
- the risk of accidentally linking with POSIX functions from a different
- library.
-
- On Unix-like systems the PCRE2 POSIX library is called libpcre2-posix,
- so can be accessed by adding -lpcre2-posix to the command for linking
- an application. Because the POSIX functions call the native ones, it is
- also necessary to add -lpcre2-8.
-
- Although they are not defined as protypes in pcre2posix.h, the library
- does contain functions with the POSIX names regcomp() etc. These simply
- pass their arguments to the PCRE2 functions. These functions are pro-
- vided for backwards compatibility with earlier versions of PCRE2, so
- that existing programs do not have to be recompiled.
-
- Calling the header file pcre2posix.h avoids any conflict with other
- POSIX libraries. It can, of course, be renamed or aliased as regex.h,
- which is the "correct" name, if there is no clash. It provides two
- structure types, regex_t for compiled internal forms, and regmatch_t
- for returning captured substrings. It also defines some constants whose
- names start with "REG_"; these are used for setting options and identi-
- fying error codes.
-
-
-USING THE POSIX FUNCTIONS
+ expression 8-bit library. See the pcre2api documentation for a descrip-
+ tion of PCRE2's native API, which contains much additional functional-
+ ity. There are no POSIX-style wrappers for PCRE2's 16-bit and 32-bit
+ libraries.
+
+ The functions described here are just wrapper functions that ultimately
+ call the PCRE2 native API. Their prototypes are defined in the
+ pcre2posix.h header file, and on Unix systems the library itself is
+ called libpcre2-posix.a, so can be accessed by adding -lpcre2-posix to
+ the command for linking an application that uses them. Because the
+ POSIX functions call the native ones, it is also necessary to add
+ -lpcre2-8.
Those POSIX option bits that can reasonably be mapped to PCRE2 native
options have been implemented. In addition, the option REG_EXTENDED is
@@ -9707,19 +9410,23 @@ USING THE POSIX FUNCTIONS
POSIX-compatible, and in multi-unit encoding domains it is probably
even less compatible.
- The descriptions below use the actual names of the functions, but, as
- described above, the standard POSIX names (without the pcre2_ prefix)
- may also be used.
+ The header for these functions is supplied as pcre2posix.h to avoid any
+ potential clash with other POSIX libraries. It can, of course, be
+ renamed or aliased as regex.h, which is the "correct" name. It provides
+ two structure types, regex_t for compiled internal forms, and reg-
+ match_t for returning captured substrings. It also defines some con-
+ stants whose names start with "REG_"; these are used for setting
+ options and identifying error codes.
COMPILING A PATTERN
- The function pcre2_regcomp() is called to compile a pattern into an
- internal form. By default, the pattern is a C string terminated by a
- binary zero (but see REG_PEND below). The preg argument is a pointer to
- a regex_t structure that is used as a base for storing information
- about the compiled regular expression. (It is also used for input when
- REG_PEND is set.)
+ The function regcomp() is called to compile a pattern into an internal
+ form. By default, the pattern is a C string terminated by a binary zero
+ (but see REG_PEND below). The preg argument is a pointer to a regex_t
+ structure that is used as a base for storing information about the com-
+ piled regular expression. (It is also used for input when REG_PEND is
+ set.)
The argument cflags is either zero, or contains one or more of the bits
defined by the following macros:
@@ -9753,23 +9460,21 @@ COMPILING A PATTERN
REG_NOSUB
- When a pattern that is compiled with this flag is passed to
- pcre2_regexec() for matching, the nmatch and pmatch arguments are
- ignored, and no captured strings are returned. Versions of the PCRE
- library prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile
- option, but this no longer happens because it disables the use of back-
- references.
+ When a pattern that is compiled with this flag is passed to regexec()
+ for matching, the nmatch and pmatch arguments are ignored, and no cap-
+ tured strings are returned. Versions of the PCRE library prior to 10.22
+ used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no
+ longer happens because it disables the use of backreferences.
REG_PEND
- If this option is set, the reg_endp field in the preg structure (which
+ If this option is set, the reg_endp field in the preg structure (which
has the type const char *) must be set to point to the character beyond
- the end of the pattern before calling pcre2_regcomp(). The pattern
- itself may now contain binary zeros, which are treated as data charac-
- ters. Without REG_PEND, a binary zero terminates the pattern and the
- re_endp field is ignored. This is a GNU extension to the POSIX standard
- and should be used with caution in software intended to be portable to
- other systems.
+ the end of the pattern before calling regcomp(). The pattern itself may
+ now contain binary zeros, which are treated as data characters. Without
+ REG_PEND, a binary zero terminates the pattern and the re_endp field is
+ ignored. This is a GNU extension to the POSIX standard and should be
+ used with caution in software intended to be portable to other systems.
REG_UCP
@@ -9799,24 +9504,23 @@ COMPILING A PATTERN
It does not affect the way newlines are matched by the dot metacharac-
ter (they are not) or by a negative class such as [^a] (they are).
- The yield of pcre2_regcomp() is zero on success, and non-zero other-
- wise. The preg structure is filled in on success, and one other member
- of the structure (as well as re_endp) is public: re_nsub contains the
- number of capturing subpatterns in the regular expression. Various
- error codes are defined in the header file.
+ The yield of regcomp() is zero on success, and non-zero otherwise. The
+ preg structure is filled in on success, and one other member of the
+ structure (as well as re_endp) is public: re_nsub contains the number
+ of capturing subpatterns in the regular expression. Various error codes
+ are defined in the header file.
- NOTE: If the yield of pcre2_regcomp() is non-zero, you must not attempt
- to use the contents of the preg structure. If, for example, you pass it
- to pcre2_regexec(), the result is undefined and your program is likely
- to crash.
+ NOTE: If the yield of regcomp() is non-zero, you must not attempt to
+ use the contents of the preg structure. If, for example, you pass it to
+ regexec(), the result is undefined and your program is likely to crash.
MATCHING NEWLINE CHARACTERS
This area is not simple, because POSIX and Perl take different views of
- things. It is not possible to get PCRE2 to obey POSIX semantics, but
+ things. It is not possible to get PCRE2 to obey POSIX semantics, but
then PCRE2 was never intended to be a POSIX engine. The following table
- lists the different possibilities for matching newline characters in
+ lists the different possibilities for matching newline characters in
Perl and PCRE2:
Default Change with
@@ -9837,25 +9541,25 @@ MATCHING NEWLINE CHARACTERS
$ matches \n in middle no REG_NEWLINE
^ matches \n in middle no REG_NEWLINE
- This behaviour is not what happens when PCRE2 is called via its POSIX
- API. By default, PCRE2's behaviour is the same as Perl's, except that
- there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2
+ This behaviour is not what happens when PCRE2 is called via its POSIX
+ API. By default, PCRE2's behaviour is the same as Perl's, except that
+ there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2
and Perl, there is no way to stop newline from matching [^a].
- Default POSIX newline handling can be obtained by setting PCRE2_DOTALL
- and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but
- there is no way to make PCRE2 behave exactly as for the REG_NEWLINE
- action. When using the POSIX API, passing REG_NEWLINE to PCRE2's
- pcre2_regcomp() function causes PCRE2_MULTILINE to be passed to
- pcre2_compile(), and REG_DOTALL passes PCRE2_DOTALL. There is no way to
- pass PCRE2_DOLLAR_ENDONLY.
+ Default POSIX newline handling can be obtained by setting PCRE2_DOTALL
+ and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but
+ there is no way to make PCRE2 behave exactly as for the REG_NEWLINE
+ action. When using the POSIX API, passing REG_NEWLINE to PCRE2's reg-
+ comp() function causes PCRE2_MULTILINE to be passed to pcre2_compile(),
+ and REG_DOTALL passes PCRE2_DOTALL. There is no way to pass PCRE2_DOL-
+ LAR_ENDONLY.
MATCHING A PATTERN
- The function pcre2_regexec() is called to match a compiled pattern preg
- against a given string, which is by default terminated by a zero byte
- (but see REG_STARTEND below), subject to the options in eflags. These
+ The function regexec() is called to match a compiled pattern preg
+ against a given string, which is by default terminated by a zero byte
+ (but see REG_STARTEND below), subject to the options in eflags. These
can be:
REG_NOTBOL
@@ -9865,9 +9569,9 @@ MATCHING A PATTERN
REG_NOTEMPTY
- The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2
- matching function. Note that REG_NOTEMPTY is not part of the POSIX
- standard. However, setting this option can give more POSIX-like behav-
+ The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2
+ matching function. Note that REG_NOTEMPTY is not part of the POSIX
+ standard. However, setting this option can give more POSIX-like behav-
iour in some situations.
REG_NOTEOL
@@ -9877,30 +9581,29 @@ MATCHING A PATTERN
REG_STARTEND
- When this option is set, the subject string starts at string +
- pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should
- point to the first character beyond the string. There may be binary
- zeros within the subject string, and indeed, using REG_STARTEND is the
+ When this option is set, the subject string starts at string +
+ pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should
+ point to the first character beyond the string. There may be binary
+ zeros within the subject string, and indeed, using REG_STARTEND is the
only way to pass a subject string that contains a binary zero.
- Whatever the value of pmatch[0].rm_so, the offsets of the matched
- string and any captured substrings are still given relative to the
- start of string itself. (Before PCRE2 release 10.30 these were given
- relative to string + pmatch[0].rm_so, but this differs from other
+ Whatever the value of pmatch[0].rm_so, the offsets of the matched
+ string and any captured substrings are still given relative to the
+ start of string itself. (Before PCRE2 release 10.30 these were given
+ relative to string + pmatch[0].rm_so, but this differs from other
implementations.)
- This is a BSD extension, compatible with but not specified by IEEE
- Standard 1003.2 (POSIX.2), and should be used with caution in software
- intended to be portable to other systems. Note that a non-zero rm_so
- does not imply REG_NOTBOL; REG_STARTEND affects only the location and
- length of the string, not how it is matched. Setting REG_STARTEND and
- passing pmatch as NULL are mutually exclusive; the error REG_INVARG is
+ This is a BSD extension, compatible with but not specified by IEEE
+ Standard 1003.2 (POSIX.2), and should be used with caution in software
+ intended to be portable to other systems. Note that a non-zero rm_so
+ does not imply REG_NOTBOL; REG_STARTEND affects only the location and
+ length of the string, not how it is matched. Setting REG_STARTEND and
+ passing pmatch as NULL are mutually exclusive; the error REG_INVARG is
returned.
- If the pattern was compiled with the REG_NOSUB flag, no data about any
- matched strings is returned. The nmatch and pmatch arguments of
- pcre2_regexec() are ignored (except possibly as input for REG_STAR-
- TEND).
+ If the pattern was compiled with the REG_NOSUB flag, no data about any
+ matched strings is returned. The nmatch and pmatch arguments of
+ regexec() are ignored (except possibly as input for REG_STARTEND).
The value of nmatch may be zero, and the value pmatch may be NULL
(unless REG_STARTEND is set); in both these cases no data about any
@@ -9923,22 +9626,22 @@ MATCHING A PATTERN
ERROR MESSAGES
- The pcre2_regerror() function maps a non-zero errorcode from either
- pcre2_regcomp() or pcre2_regexec() to a printable message. If preg is
- not NULL, the error should have arisen from the use of that structure.
- A message terminated by a binary zero is placed in errbuf. If the buf-
- fer is too short, only the first errbuf_size - 1 characters of the
- error message are used. The yield of the function is the size of buffer
- needed to hold the whole message, including the terminating zero. This
- value is greater than errbuf_size if the message was truncated.
+ The regerror() function maps a non-zero errorcode from either regcomp()
+ or regexec() to a printable message. If preg is not NULL, the error
+ should have arisen from the use of that structure. A message terminated
+ by a binary zero is placed in errbuf. If the buffer is too short, only
+ the first errbuf_size - 1 characters of the error message are used. The
+ yield of the function is the size of buffer needed to hold the whole
+ message, including the terminating zero. This value is greater than
+ errbuf_size if the message was truncated.
MEMORY USAGE
Compiling a regular expression causes memory to be allocated and asso-
- ciated with the preg structure. The function pcre2_regfree() frees all
- such memory, after which preg may no longer be used as a compiled
- expression.
+ ciated with the preg structure. The function regfree() frees all such
+ memory, after which preg may no longer be used as a compiled expres-
+ sion.
AUTHOR
@@ -9950,8 +9653,8 @@ AUTHOR
REVISION
- Last updated: 30 January 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 15 June 2017
+ Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
@@ -10256,8 +9959,7 @@ QUOTING
ESCAPED CHARACTERS
- This table applies to ASCII and Unicode environments. An unrecognized
- escape sequence causes an error.
+ This table applies to ASCII and Unicode environments.
\a alarm, that is, the BEL character (hex 07)
\cx "control-x", where x is any ASCII printing character
@@ -10269,32 +9971,26 @@ ESCAPED CHARACTERS
\0dd character with octal code 0dd
\ddd character with octal code ddd, or backreference
\o{ddd..} character with octal code ddd..
+ \U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
\N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
+ \uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\xhh character with hex code hh
\x{hh..} character with hex code hh..
- If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
- following are also recognized:
-
- \U the character "U"
- \uhhhh character with hex code hhhh
- \u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX
-
- When \x is not followed by {, from zero to two hexadecimal digits are
- read, but in ALT_BSUX mode \x must be followed by two hexadecimal dig-
- its to be recognized as a hexadecimal escape; otherwise it matches a
- literal "x". Likewise, if \u (in ALT_BSUX mode) is not followed by
- four hexadecimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex
- digits in curly brackets, it matches a literal "u".
-
Note that \0dd is always an octal code. The treatment of backslash fol-
- lowed by a non-zero digit is complicated; for details see the section
- "Non-printing characters" in the pcre2pattern documentation, where
- details of escape processing in EBCDIC environments are also given.
+ lowed by a non-zero digit is complicated; for details see the section
+ "Non-printing characters" in the pcre2pattern documentation, where
+ details of escape processing in EBCDIC environments are also given.
\N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not supported in
- EBCDIC environments. Note that \N not followed by an opening curly
+ EBCDIC environments. Note that \N not followed by an opening curly
bracket has a different meaning (see below).
+ When \x is not followed by {, from zero to two hexadecimal digits are
+ read, but if PCRE2_ALT_BSUX is set, \x must be followed by two hexadec-
+ imal digits to be recognized as a hexadecimal escape; otherwise it
+ matches a literal "x". Likewise, if \u (in ALT_BSUX mode) is not fol-
+ lowed by four hexadecimal digits, it matches a literal "u".
+
CHARACTER TYPES
@@ -10317,14 +10013,14 @@ CHARACTER TYPES
\W a "non-word" character
\X a Unicode extended grapheme cluster
- \C is dangerous because it may leave the current matching point in the
+ \C is dangerous because it may leave the current matching point in the
middle of a UTF-8 or UTF-16 character. The application can lock out the
- use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also
+ use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also
possible to build PCRE2 with the use of \C permanently disabled.
- By default, \d, \s, and \w match only ASCII characters, even in UTF-8
+ By default, \d, \s, and \w match only ASCII characters, even in UTF-8
mode or in the 16-bit and 32-bit libraries. However, if locale-specific
- matching is happening, \s and \w may also match characters with code
+ matching is happening, \s and \w may also match characters with code
points in the range 128-255. If the PCRE2_UCP option is set, the behav-
iour of these escape sequences is changed to use Unicode properties and
they match many more characters.
@@ -10393,28 +10089,28 @@ PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P
SCRIPT NAMES FOR \p AND \P
- Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
- nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
- Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
- nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
- Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
- Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
- Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
- Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
- Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
- nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
- Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
- jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
+ Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
+ nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
+ Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
+ nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
+ Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
+ Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
+ Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
+ Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
+ Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
+ nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
+ Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
+ jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
Medefaidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
- Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
- Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
- ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
- dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
+ Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
+ Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
- Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
- vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
- Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
- Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
nagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi, Zanabazar_Square.
@@ -10441,8 +10137,8 @@ CHARACTER CLASSES
word same as \w
xdigit hexadecimal digit
- In PCRE2, POSIX character set names recognize only ASCII characters by
- default, but some of them use Unicode properties if PCRE2_UCP is set.
+ In PCRE2, POSIX character set names recognize only ASCII characters by
+ default, but some of them use Unicode properties if PCRE2_UCP is set.
You can use \Q...\E inside a character class.
@@ -10497,23 +10193,18 @@ ALTERNATION
CAPTURING
- (...) capture group
- (?<name>...) named capture group (Perl)
- (?'name'...) named capture group (Perl)
- (?P<name>...) named capture group (Python)
- (?:...) non-capture group
- (?|...) non-capture group; reset group numbers for
- capture groups in each alternative
-
- In non-UTF modes, names may contain underscores and ASCII letters and
- digits; in UTF modes, any Unicode letters and Unicode decimal digits
- are permitted. In both cases, a name must not start with a digit.
+ (...) capturing group
+ (?<name>...) named capturing group (Perl)
+ (?'name'...) named capturing group (Perl)
+ (?P<name>...) named capturing group (Python)
+ (?:...) non-capturing group
+ (?|...) non-capturing group; reset group numbers for
+ capturing groups in each alternative
ATOMIC GROUPS
- (?>...) atomic non-capture group
- (*atomic:...) atomic non-capture group
+ (?>...) atomic, non-capturing group
COMMENT
@@ -10522,7 +10213,7 @@ COMMENT
OPTION SETTING
- Changes of these options within a group are automatically cancelled at
+ Changes of these options within a group are automatically cancelled at
the end of the group.
(?i) caseless
@@ -10536,14 +10227,14 @@ OPTION SETTING
(?-...) unset option(s)
(?^) unset imnsx options
- Unsetting x or xx unsets both. Several options may be set at once, and
+ Unsetting x or xx unsets both. Several options may be set at once, and
a mixture of setting and unsetting such as (?i-x) is allowed, but there
may be only one hyphen. Setting (but no unsetting) is allowed after (?^
for example (?^in). An option setting may appear at the start of a non-
- capture group, for example (?i:...).
+ capturing group, for example (?i:...).
- The following are recognized only at the very start of a pattern or
- after one of the newline or \R options with similar syntax. More than
+ The following are recognized only at the very start of a pattern or
+ after one of the newline or \R options with similar syntax. More than
one of them may appear. For the first three, d is a decimal number.
(*LIMIT_DEPTH=d) set the backtracking limit to d
@@ -10558,17 +10249,17 @@ OPTION SETTING
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
- Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
- value of the limits set by the caller of pcre2_match() or
- pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
+ Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
+ value of the limits set by the caller of pcre2_match() or
+ pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF)
- and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
+ and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
respectively, at compile time.
NEWLINE CONVENTION
- These are recognized only at the very start of the pattern or after
+ These are recognized only at the very start of the pattern or after
option settings with a similar syntax.
(*CR) carriage return only
@@ -10581,7 +10272,7 @@ NEWLINE CONVENTION
WHAT \R MATCHES
- These are recognized only at the very start of the pattern or after
+ These are recognized only at the very start of the pattern or after
option setting with a similar syntax.
(*BSR_ANYCRLF) CR, LF, or CRLF
@@ -10590,32 +10281,12 @@ WHAT \R MATCHES
LOOKAHEAD AND LOOKBEHIND ASSERTIONS
- (?=...) )
- (*pla:...) ) positive lookahead
- (*positive_lookahead:...) )
-
- (?!...) )
- (*nla:...) ) negative lookahead
- (*negative_lookahead:...) )
-
- (?<=...) )
- (*plb:...) ) positive lookbehind
- (*positive_lookbehind:...) )
+ (?=...) positive look ahead
+ (?!...) negative look ahead
+ (?<=...) positive look behind
+ (?<!...) negative look behind
- (?<!...) )
- (*nlb:...) ) negative lookbehind
- (*negative_lookbehind:...) )
-
- Each top-level branch of a lookbehind must be of a fixed length.
-
-
-SCRIPT RUNS
-
- (*script_run:...) ) script run, can be backtracked into
- (*sr:...) )
-
- (*atomic_script_run:...) ) atomic script run
- (*asr:...) )
+ Each top-level branch of a look behind must be of a fixed length.
BACKREFERENCES
@@ -10637,19 +10308,19 @@ BACKREFERENCES
SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)
(?R) recurse whole pattern
- (?n) call subroutine by absolute number
- (?+n) call subroutine by relative number
- (?-n) call subroutine by relative number
- (?&name) call subroutine by name (Perl)
- (?P>name) call subroutine by name (Python)
- \g<name> call subroutine by name (Oniguruma)
- \g'name' call subroutine by name (Oniguruma)
- \g<n> call subroutine by absolute number (Oniguruma)
- \g'n' call subroutine by absolute number (Oniguruma)
- \g<+n> call subroutine by relative number (PCRE2 extension)
- \g'+n' call subroutine by relative number (PCRE2 extension)
- \g<-n> call subroutine by relative number (PCRE2 extension)
- \g'-n' call subroutine by relative number (PCRE2 extension)
+ (?n) call subpattern by absolute number
+ (?+n) call subpattern by relative number
+ (?-n) call subpattern by relative number
+ (?&name) call subpattern by name (Perl)
+ (?P>name) call subpattern by name (Python)
+ \g<name> call subpattern by name (Oniguruma)
+ \g'name' call subpattern by name (Oniguruma)
+ \g<n> call subpattern by absolute number (Oniguruma)
+ \g'n' call subpattern by absolute number (Oniguruma)
+ \g<+n> call subpattern by relative number (PCRE2 extension)
+ \g'+n' call subpattern by relative number (PCRE2 extension)
+ \g<-n> call subpattern by relative number (PCRE2 extension)
+ \g'-n' call subpattern by relative number (PCRE2 extension)
CONDITIONAL PATTERNS
@@ -10666,20 +10337,20 @@ CONDITIONAL PATTERNS
(?(R) overall recursion condition
(?(Rn) specific numbered group recursion condition
(?(R&name) specific named group recursion condition
- (?(DEFINE) define groups for reference
+ (?(DEFINE) define subpattern for reference
(?(VERSION[>]=n.m) test PCRE2 version
(?(assert) assertion condition
- Note the ambiguity of (?(R) and (?(Rn) which might be named reference
- conditions or recursion tests. Such a condition is interpreted as a
+ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
+ conditions or recursion tests. Such a condition is interpreted as a
reference condition if the relevant named group exists.
BACKTRACKING CONTROL
- All backtracking control verbs may be in the form (*VERB:NAME). For
- (*MARK) the name is mandatory, for the others it is optional. (*SKIP)
- changes its behaviour if :NAME is present. The others just set a name
+ All backtracking control verbs may be in the form (*VERB:NAME). For
+ (*MARK) the name is mandatory, for the others it is optional. (*SKIP)
+ changes its behaviour if :NAME is present. The others just set a name
for passing back to the caller, but this is not a name that (*SKIP) can
see. The following act immediately they are reached:
@@ -10687,7 +10358,7 @@ BACKTRACKING CONTROL
(*FAIL) force backtrack; synonym (*F)
(*MARK:NAME) set name to be passed back; synonym (*:NAME)
- The following act only when a subsequent match failure causes a back-
+ The following act only when a subsequent match failure causes a back-
track to reach them. They all force a match failure, but they differ in
what happens afterwards. Those that advance the start-of-match point do
so only if the pattern is not anchored.
@@ -10699,7 +10370,7 @@ BACKTRACKING CONTROL
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation
- The effect of one of these verbs in a group called as a subroutine is
+ The effect of one of these verbs in a group called as a subroutine is
confined to the subroutine call.
@@ -10710,14 +10381,14 @@ CALLOUTS
(?C"text") callout with string data
The allowed string delimiters are ` ' " ^ % # $ (which are the same for
- the start and the end), and the starting delimiter { matched with the
- ending delimiter }. To encode the ending delimiter within the string,
+ the start and the end), and the starting delimiter { matched with the
+ ending delimiter }. To encode the ending delimiter within the string,
double it.
SEE ALSO
- pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3),
+ pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3),
pcre2(3).
@@ -10730,8 +10401,8 @@ AUTHOR
REVISION
- Last updated: 11 February 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 02 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -10764,73 +10435,66 @@ UNICODE AND UTF SUPPORT
UNICODE PROPERTY SUPPORT
When PCRE2 is built with Unicode support, the escape sequences \p{..},
- \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF set-
- ting. The Unicode properties that can be tested are limited to the
- general category properties such as Lu for an upper case letter or Nd
- for a decimal number, the Unicode script names such as Arabic or Han,
- and the derived properties Any and L&. Full lists are given in the
- pcre2pattern and pcre2syntax documentation. Only the short names for
- properties are supported. For example, \p{L} matches a letter. Its Perl
- synonym, \p{Letter}, is not supported. Furthermore, in Perl, many
- properties may optionally be prefixed by "Is", for compatibility with
- Perl 5.6. PCRE2 does not support this.
+ \P{..}, and \X can be used. The Unicode properties that can be tested
+ are limited to the general category properties such as Lu for an upper
+ case letter or Nd for a decimal number, the Unicode script names such
+ as Arabic or Han, and the derived properties Any and L&. Full lists are
+ given in the pcre2pattern and pcre2syntax documentation. Only the short
+ names for properties are supported. For example, \p{L} matches a let-
+ ter. Its Perl synonym, \p{Letter}, is not supported. Furthermore, in
+ Perl, many properties may optionally be prefixed by "Is", for compati-
+ bility with Perl 5.6. PCRE2 does not support this.
WIDE CHARACTERS AND UTF MODES
Code points less than 256 can be specified in patterns by either braced
or unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3).
- Larger values have to use braced sequences. Unbraced octal code points
+ Larger values have to use braced sequences. Unbraced octal code points
up to \777 are also recognized; larger ones can be coded using \o{...}.
- The escape sequence \N{U+<hex digits>} is recognized as another way of
- specifying a Unicode character by code point in a UTF mode. It is not
+ The escape sequence \N{U+<hex digits>} is recognized as another way of
+ specifying a Unicode character by code point in a UTF mode. It is not
allowed in non-UTF modes.
- In UTF modes, repeat quantifiers apply to complete UTF characters, not
+ In UTF modes, repeat quantifiers apply to complete UTF characters, not
to individual code units.
- In UTF modes, the dot metacharacter matches one UTF character instead
+ In UTF modes, the dot metacharacter matches one UTF character instead
of a single code unit.
- In UTF modes, capture group names are not restricted to ASCII, and may
- contain any Unicode letters and decimal digits, as well as underscore.
-
The escape sequence \C can be used to match a single code unit in a UTF
mode, but its use can lead to some strange effects because it breaks up
multi-unit characters (see the description of \C in the pcre2pattern
- documentation). For this reason, there is a build-time option that dis-
- ables support for \C completely. There is also a less draconian com-
- pile-time option for locking out the use of \C when a pattern is com-
- piled.
+ documentation).
- The use of \C is not supported by the alternative matching function
+ The use of \C is not supported by the alternative matching function
pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a charac-
- ter may consist of more than one code unit. The use of \C in these
- modes provokes a match-time error. Also, the JIT optimization does not
+ ter may consist of more than one code unit. The use of \C in these
+ modes provokes a match-time error. Also, the JIT optimization does not
support \C in these modes. If JIT optimization is requested for a UTF-8
- or UTF-16 pattern that contains \C, it will not succeed, and so when
+ or UTF-16 pattern that contains \C, it will not succeed, and so when
pcre2_match() is called, the matching will be carried out by the normal
interpretive function.
The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
- characters of any code value, but, by default, the characters that
- PCRE2 recognizes as digits, spaces, or word characters remain the same
- set as in non-UTF mode, all with code points less than 256. This
- remains true even when PCRE2 is built to include Unicode support,
- because to do otherwise would slow down matching in many common cases.
- Note that this also applies to \b and \B, because they are defined in
- terms of \w and \W. If you want to test for a wider sense of, say,
- "digit", you can use explicit Unicode property tests such as \p{Nd}.
- Alternatively, if you set the PCRE2_UCP option, the way that the char-
- acter escapes work is changed so that Unicode properties are used to
+ characters of any code value, but, by default, the characters that
+ PCRE2 recognizes as digits, spaces, or word characters remain the same
+ set as in non-UTF mode, all with code points less than 256. This
+ remains true even when PCRE2 is built to include Unicode support,
+ because to do otherwise would slow down matching in many common cases.
+ Note that this also applies to \b and \B, because they are defined in
+ terms of \w and \W. If you want to test for a wider sense of, say,
+ "digit", you can use explicit Unicode property tests such as \p{Nd}.
+ Alternatively, if you set the PCRE2_UCP option, the way that the char-
+ acter escapes work is changed so that Unicode properties are used to
determine which characters match. There are more details in the section
on generic character types in the pcre2pattern documentation.
- Similarly, characters that match the POSIX named character classes are
+ Similarly, characters that match the POSIX named character classes are
all low-valued characters, unless the PCRE2_UCP option is set.
- However, the special horizontal and vertical white space matching
+ However, the special horizontal and vertical white space matching
escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
acters, whether or not PCRE2_UCP is set.
@@ -10840,156 +10504,35 @@ CASE-EQUIVALENCE IN UTF MODES
Case-insensitive matching in a UTF mode makes use of Unicode properties
except for characters whose code points are less than 128 and that have
at most two case-equivalent values. For these, a direct table lookup is
- used for speed. A few Unicode characters such as Greek sigma have more
+ used for speed. A few Unicode characters such as Greek sigma have more
than two code points that are case-equivalent, and these are treated as
such.
-SCRIPT RUNS
-
- The pattern constructs (*script_run:...) and (*atomic_script_run:...),
- with synonyms (*sr:...) and (*asr:...), verify that the string matched
- within the parentheses is a script run. In concept, a script run is a
- sequence of characters that are all from the same Unicode script. How-
- ever, because some scripts are commonly used together, and because some
- diacritical and other marks are used with multiple scripts, it is not
- that simple.
-
- Every Unicode character has a Script property, mostly with a value cor-
- responding to the name of a script, such as Latin, Greek, or Cyrillic.
- There are also three special values:
-
- "Unknown" is used for code points that have not been assigned, and also
- for the surrogate code points. In the PCRE2 32-bit library, characters
- whose code points are greater than the Unicode maximum (U+10FFFF),
- which are accessible only in non-UTF mode, are assigned the Unknown
- script.
-
- "Common" is used for characters that are used with many scripts. These
- include punctuation, emoji, mathematical, musical, and currency sym-
- bols, and the ASCII digits 0 to 9.
-
- "Inherited" is used for characters such as diacritical marks that mod-
- ify a previous character. These are considered to take on the script of
- the character that they modify.
-
- Some Inherited characters are used with many scripts, but many of them
- are only normally used with a small number of scripts. For example,
- U+102E0 (Coptic Epact thousands mark) is used only with Arabic and Cop-
- tic. In order to make it possible to check this, a Unicode property
- called Script Extension exists. Its value is a list of scripts that
- apply to the character. For the majority of characters, the list con-
- tains just one script, the same one as the Script property. However,
- for characters such as U+102E0 more than one Script is listed. There
- are also some Common characters that have a single, non-Common script
- in their Script Extension list.
-
- The next section describes the basic rules for deciding whether a given
- string of characters is a script run. Note, however, that there are
- some special cases involving the Chinese Han script, and an additional
- constraint for decimal digits. These are covered in subsequent sec-
- tions.
-
- Basic script run rules
-
- A string that is less than two characters long is a script run. This is
- the only case in which an Unknown character can be part of a script
- run. Longer strings are checked using only the Script Extensions prop-
- erty, not the basic Script property.
-
- If a character's Script Extension property is the single value "Inher-
- ited", it is always accepted as part of a script run. This is also true
- for the property "Common", subject to the checking of decimal digits
- described below. All the remaining characters in a script run must have
- at least one script in common in their Script Extension lists. In set-
- theoretic terminology, the intersection of all the sets of scripts must
- not be empty.
-
- A simple example is an Internet name such as "google.com". The letters
- are all in the Latin script, and the dot is Common, so this string is a
- script run. However, the Cyrillic letter "o" looks exactly the same as
- the Latin "o"; a string that looks the same, but with Cyrillic "o"s is
- not a script run.
-
- More interesting examples involve characters with more than one script
- in their Script Extension. Consider the following characters:
-
- U+060C Arabic comma
- U+06D4 Arabic full stop
-
- The first has the Script Extension list Arabic, Hanifi Rohingya, Syr-
- iac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both
- of them could appear in script runs of either Arabic or Hanifi
- Rohingya. The first could also appear in Syriac or Thaana script runs,
- but the second could not.
-
- The Chinese Han script
-
- The Chinese Han script is commonly used in conjunction with other
- scripts for writing certain languages. Japanese uses the Hiragana and
- Katakana scripts together with Han; Korean uses Hangul and Han; Tai-
- wanese Mandarin uses Bopomofo and Han. These three combinations are
- treated as special cases when checking script runs and are, in effect,
- "virtual scripts". Thus, a script run may contain a mixture of Hira-
- gana, Katakana, and Han, or a mixture of Hangul and Han, or a mixture
- of Bopomofo and Han, but not, for example, a mixture of Hangul and
- Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical Stan-
- dard 39 ("Unicode Security Mechanisms", http://uni-
- code.org/reports/tr39/) in allowing such mixtures.
-
- Decimal digits
-
- Unicode contains many sets of 10 decimal digits in different scripts,
- and some scripts (including the Common script) contain more than one
- set. Some of these decimal digits them are visually indistinguishable
- from the common ASCII digits. In addition to the script checking
- described above, if a script run contains any decimal digits, they must
- all come from the same set of 10 adjacent characters.
-
-
VALIDITY OF UTF STRINGS
- When the PCRE2_UTF option is set, the strings passed as patterns and
+ When the PCRE2_UTF option is set, the strings passed as patterns and
subjects are (by default) checked for validity on entry to the relevant
- functions. If an invalid UTF string is passed, an negative error code
- is returned. The code unit offset to the offending character can be
- extracted from the match data block by calling pcre2_get_startchar(),
+ functions. If an invalid UTF string is passed, an negative error code
+ is returned. The code unit offset to the offending character can be
+ extracted from the match data block by calling pcre2_get_startchar(),
which is used for this purpose after a UTF error.
- In some situations, you may already know that your strings are valid,
- and therefore want to skip these checks in order to improve perfor-
- mance, for example in the case of a long subject string that is being
- scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com-
- pile time or at match time, PCRE2 assumes that the pattern or subject
- it is given (respectively) contains only valid UTF code unit sequences.
-
- If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the
- result is usually undefined and your program may crash or loop indefi-
- nitely. There is, however, one mode of matching that can handle invalid
- UTF subject strings. This is matching via the JIT optimization using
- the PCRE2_JIT_INVALID_UTF option when calling pcre2_jit_compile(). For
- details, see the pcre2jit documentation.
-
- Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the check
- for the pattern; it does not also apply to subject strings. If you want
- to disable the check for a subject string you must pass this same
- option to pcre2_match() or pcre2_dfa_match().
-
UTF-16 and UTF-32 strings can indicate their endianness by special code
knows as a byte-order mark (BOM). The PCRE2 functions do not handle
this, expecting strings to be in host byte order.
- Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any
- other processing takes place. In the case of pcre2_match() and
- pcre2_dfa_match() calls with a non-zero starting offset, the check is
- applied only to that part of the subject that could be inspected during
- matching, and there is a check that the starting offset points to the
- first code unit of a character or to the end of the subject. If there
- are no lookbehind assertions in the pattern, the check starts at the
- starting offset. Otherwise, it starts at the length of the longest
- lookbehind before the starting offset, or at the start of the subject
- if there are not that many characters before the starting offset. Note
- that the sequences \b and \B are one-character lookbehinds.
+ A UTF string is checked before any other processing takes place. In the
+ case of pcre2_match() and pcre2_dfa_match() calls with a non-zero
+ starting offset, the check is applied only to that part of the subject
+ that could be inspected during matching, and there is a check that the
+ starting offset points to the first code unit of a character or to the
+ end of the subject. If there are no lookbehind assertions in the pat-
+ tern, the check starts at the starting offset. Otherwise, it starts at
+ the length of the longest lookbehind before the starting offset, or at
+ the start of the subject if there are not that many characters before
+ the starting offset. Note that the sequences \b and \B are one-charac-
+ ter lookbehinds.
In addition to checking the format of the string, there is a check to
ensure that all code points lie in the range U+0 to U+10FFFF, excluding
@@ -11004,10 +10547,25 @@ VALIDITY OF UTF STRINGS
other words, the whole surrogate thing is a fudge for UTF-16 which
unfortunately messes up UTF-8 and UTF-32.)
- Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
- that is given if an escape sequence for an invalid Unicode code point
- is encountered in the pattern. If you want to allow escape sequences
- such as \x{d800} (a surrogate code point) you can set the
+ In some situations, you may already know that your strings are valid,
+ and therefore want to skip these checks in order to improve perfor-
+ mance, for example in the case of a long subject string that is being
+ scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com-
+ pile time or at match time, PCRE2 assumes that the pattern or subject
+ it is given (respectively) contains only valid UTF code unit sequences.
+
+ Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the check
+ for the pattern; it does not also apply to subject strings. If you want
+ to disable the check for a subject string you must pass this option to
+ pcre2_match() or pcre2_dfa_match().
+
+ If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the
+ result is undefined and your program may crash or loop indefinitely.
+
+ Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable
+ the error that is given if an escape sequence for an invalid Unicode
+ code point is encountered in the pattern. If you want to allow escape
+ sequences such as \x{d800} (a surrogate code point) you can set the
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is pos-
sible only in UTF-8 and UTF-32 modes, because these values are not rep-
resentable in UTF-16.
@@ -11022,10 +10580,10 @@ VALIDITY OF UTF STRINGS
PCRE2_ERROR_UTF8_ERR4
PCRE2_ERROR_UTF8_ERR5
- The string ends with a truncated UTF-8 character; the code specifies
- how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8
- characters to be no longer than 4 bytes, the encoding scheme (origi-
- nally defined by RFC 2279) allows for up to 6 bytes, and this is
+ The string ends with a truncated UTF-8 character; the code specifies
+ how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8
+ characters to be no longer than 4 bytes, the encoding scheme (origi-
+ nally defined by RFC 2279) allows for up to 6 bytes, and this is
checked first; hence the possibility of 4 or 5 missing bytes.
PCRE2_ERROR_UTF8_ERR6
@@ -11035,24 +10593,24 @@ VALIDITY OF UTF STRINGS
PCRE2_ERROR_UTF8_ERR10
The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of
- the character do not have the binary value 0b10 (that is, either the
+ the character do not have the binary value 0b10 (that is, either the
most significant bit is 0, or the next bit is 1).
PCRE2_ERROR_UTF8_ERR11
PCRE2_ERROR_UTF8_ERR12
- A character that is valid by the RFC 2279 rules is either 5 or 6 bytes
+ A character that is valid by the RFC 2279 rules is either 5 or 6 bytes
long; these code points are excluded by RFC 3629.
PCRE2_ERROR_UTF8_ERR13
- A 4-byte character has a value greater than 0x10fff; these code points
+ A 4-byte character has a value greater than 0x10fff; these code points
are excluded by RFC 3629.
PCRE2_ERROR_UTF8_ERR14
- A 3-byte character has a value in the range 0xd800 to 0xdfff; this
- range of code points are reserved by RFC 3629 for use with UTF-16, and
+ A 3-byte character has a value in the range 0xd800 to 0xdfff; this
+ range of code points are reserved by RFC 3629 for use with UTF-16, and
so are excluded from UTF-8.
PCRE2_ERROR_UTF8_ERR15
@@ -11061,26 +10619,26 @@ VALIDITY OF UTF STRINGS
PCRE2_ERROR_UTF8_ERR18
PCRE2_ERROR_UTF8_ERR19
- A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes
- for a value that can be represented by fewer bytes, which is invalid.
- For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor-
+ A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes
+ for a value that can be represented by fewer bytes, which is invalid.
+ For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor-
rect coding uses just one byte.
PCRE2_ERROR_UTF8_ERR20
The two most significant bits of the first byte of a character have the
- binary value 0b10 (that is, the most significant bit is 1 and the sec-
- ond is 0). Such a byte can only validly occur as the second or subse-
+ binary value 0b10 (that is, the most significant bit is 1 and the sec-
+ ond is 0). Such a byte can only validly occur as the second or subse-
quent byte of a multi-byte character.
PCRE2_ERROR_UTF8_ERR21
- The first byte of a character has the value 0xfe or 0xff. These values
+ The first byte of a character has the value 0xfe or 0xff. These values
can never occur in a valid UTF-8 string.
Errors in UTF-16 strings
- The following negative error codes are given for invalid UTF-16
+ The following negative error codes are given for invalid UTF-16
strings:
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string
@@ -11090,7 +10648,7 @@ VALIDITY OF UTF STRINGS
Errors in UTF-32 strings
- The following negative error codes are given for invalid UTF-32
+ The following negative error codes are given for invalid UTF-32
strings:
PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff)
@@ -11106,8 +10664,8 @@ AUTHOR
REVISION
- Last updated: 06 March 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 02 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
diff --git a/dist2/doc/pcre2_compile.3 b/dist2/doc/pcre2_compile.3
index b23bf46d..a5e8269d 100644
--- a/dist2/doc/pcre2_compile.3
+++ b/dist2/doc/pcre2_compile.3
@@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "11 February 2019" "PCRE2 10.33"
+.TH PCRE2_COMPILE 3 "16 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -73,13 +73,7 @@ The option bits are:
PCRE2 must be built with Unicode support (the default) in order to use
PCRE2_UTF, PCRE2_UCP and related options.
.P
-Additional options may be set in the compile context via the
-.\" HREF
-\fBpcre2_set_compile_extra_options\fP
-.\"
-function.
-.P
-The yield of this function is a pointer to a private data structure that
+The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected.
.P
There is a complete description of the PCRE2 native API, with more detail on
diff --git a/dist2/doc/pcre2_dfa_match.3 b/dist2/doc/pcre2_dfa_match.3
index 6413cb60..dfc3ae6e 100644
--- a/dist2/doc/pcre2_dfa_match.3
+++ b/dist2/doc/pcre2_dfa_match.3
@@ -1,4 +1,4 @@
-.TH PCRE2_DFA_MATCH 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_DFA_MATCH 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -39,8 +39,6 @@ depth limits. The \fIlength\fP and \fIstartoffset\fP values are code units, not
characters. The options are:
.sp
PCRE2_ANCHORED Match only at the first position
- PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
diff --git a/dist2/doc/pcre2_jit_compile.3 b/dist2/doc/pcre2_jit_compile.3
index 23dd2d2d..a11d949d 100644
--- a/dist2/doc/pcre2_jit_compile.3
+++ b/dist2/doc/pcre2_jit_compile.3
@@ -1,4 +1,4 @@
-.TH PCRE2_JIT_COMPILE 3 "06 March 2019" "PCRE2 10.33"
+.TH PCRE2_JIT_COMPILE 3 "21 October 2014" "PCRE2 10.00"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -29,7 +29,6 @@ bits:
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
- PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF
.sp
The yield of the function is 0 for success, or a negative error code otherwise.
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
diff --git a/dist2/doc/pcre2_match.3 b/dist2/doc/pcre2_match.3
index 2be2dd0a..9d15ec9c 100644
--- a/dist2/doc/pcre2_match.3
+++ b/dist2/doc/pcre2_match.3
@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_MATCH 3 "14 November 2017" "PCRE2 10.31"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -43,13 +43,11 @@ A match context is needed only if you want to:
Change the backtracking depth limit
Set custom memory management specifically for the match
.sp
-The \fIlength\fP and \fIstartoffset\fP values are code units, not characters.
-The length may be given as PCRE2_ZERO_TERMINATED for a subject that is
-terminated by a binary zero code unit. The options are:
+The \fIlength\fP and \fIstartoffset\fP values are code
+units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
+subject that is terminated by a binary zero code unit. The options are:
.sp
PCRE2_ANCHORED Match only at the first position
- PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
diff --git a/dist2/doc/pcre2_match_data_free.3 b/dist2/doc/pcre2_match_data_free.3
index cebdef90..56ed08bb 100644
--- a/dist2/doc/pcre2_match_data_free.3
+++ b/dist2/doc/pcre2_match_data_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_DATA_FREE 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_MATCH_DATA_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -18,10 +18,6 @@ If \fImatch_data\fP is NULL, this function does nothing. Otherwise,
using the memory freeing function from the general context or compiled pattern
with which it was created, or \fBfree()\fP if that was not set.
.P
-If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
-match data block, the copy of the subject that was remembered with the block is
-also freed.
-.P
There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
diff --git a/dist2/doc/pcre2_pattern_info.3 b/dist2/doc/pcre2_pattern_info.3
index edd8989d..01b74a2a 100644
--- a/dist2/doc/pcre2_pattern_info.3
+++ b/dist2/doc/pcre2_pattern_info.3
@@ -1,4 +1,4 @@
-.TH PCRE2_PATTERN_INFO 3 "14 February 2019" "PCRE2 10.33"
+.TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -7,8 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.B #include <pcre2.h>
.PP
.nf
-.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
-.B " void *\fIwhere\fP);"
+.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
.fi
.
.SH DESCRIPTION
diff --git a/dist2/doc/pcre2_set_compile_extra_options.3 b/dist2/doc/pcre2_set_compile_extra_options.3
index 764a75e8..79f71cec 100644
--- a/dist2/doc/pcre2_set_compile_extra_options.3
+++ b/dist2/doc/pcre2_set_compile_extra_options.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "11 February 2019" "PCRE2 10.33"
+.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "16 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -8,7 +8,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.PP
.nf
.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
-.B " uint32_t \fIextra_options\fP);"
+.B " PCRE2_SIZE \fIextra_options\fP);"
.fi
.
.SH DESCRIPTION
@@ -22,12 +22,8 @@ options are:
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{df800} to \ex{dfff}
in UTF-8 and UTF-32 modes
.\" JOIN
- PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and \ex
- handling
-.\" JOIN
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as
a literal following character
- PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \er as \en
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
PCRE2_EXTRA_MATCH_WORD Pattern matches "words"
.sp
diff --git a/dist2/doc/pcre2_set_substitute_callout.3 b/dist2/doc/pcre2_set_substitute_callout.3
deleted file mode 100644
index cdd7ac6a..00000000
--- a/dist2/doc/pcre2_set_substitute_callout.3
+++ /dev/null
@@ -1,31 +0,0 @@
-.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "12 November 2018" "PCRE2 10.33"
-.SH NAME
-PCRE2 - Perl-compatible regular expressions (revised API)
-.SH SYNOPSIS
-.rs
-.sp
-.B #include <pcre2.h>
-.PP
-.nf
-.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
-.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *),"
-.B " void *\fIcallout_data\fP);"
-.fi
-.
-.SH DESCRIPTION
-.rs
-.sp
-This function sets the substitute callout fields in a match context (the first
-argument). The second argument specifies a callout function, and the third
-argument is an opaque data item that is passed to it. The result of this
-function is always zero.
-.P
-There is a complete description of the PCRE2 native API in the
-.\" HREF
-\fBpcre2api\fP
-.\"
-page and a description of the POSIX API in the
-.\" HREF
-\fBpcre2posix\fP
-.\"
-page.
diff --git a/dist2/doc/pcre2_substring_nametable_scan.3 b/dist2/doc/pcre2_substring_nametable_scan.3
index 9ab58cdc..4342f988 100644
--- a/dist2/doc/pcre2_substring_nametable_scan.3
+++ b/dist2/doc/pcre2_substring_nametable_scan.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "21 October 2014" "PCRE2 10.00"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,8 +15,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This convenience function finds, for a compiled pattern, the first and last
-entries for a given name in the table that translates capture group names into
-numbers.
+entries for a given name in the table that translates capturing parenthesis
+names into numbers.
.sp
\fIcode\fP Compiled regular expression
\fIname\fP Name whose entries required
diff --git a/dist2/doc/pcre2api.3 b/dist2/doc/pcre2api.3
index d219466c..ba90c86d 100644
--- a/dist2/doc/pcre2api.3
+++ b/dist2/doc/pcre2api.3
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "14 February 2019" "PCRE2 10.33"
+.TH PCRE2API 3 "07 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -123,10 +123,6 @@ document for an overview of all the PCRE2 documentation.
.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *),"
.B " void *\fIcallout_data\fP);"
.sp
-.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
-.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
-.B " void *\fIcallout_data\fP);"
-.sp
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
.B " PCRE2_SIZE \fIvalue\fP);"
.sp
@@ -247,8 +243,7 @@ document for an overview of all the PCRE2 documentation.
.sp
.B const unsigned char *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
.sp
-.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
-.B " void *\fIwhere\fP);"
+.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
.sp
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
@@ -790,7 +785,7 @@ functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP.
.B " uint32_t \fIvalue\fP);"
.fi
.sp
-This parameter adjusts the limit, set when PCRE2 is built (default 250), on the
+This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
using up too much system stack when being compiled. The limit applies to
parentheses of all kinds, not just capturing parentheses.
@@ -852,7 +847,7 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
.B " void *\fIcallout_data\fP);"
.fi
.sp
-This sets up a callout function for PCRE2 to call at specified points
+This sets up a "callout" function for PCRE2 to call at specified points
during a matching operation. Details are given in the
.\" HREF
\fBpcre2callout\fP
@@ -860,20 +855,6 @@ during a matching operation. Details are given in the
documentation.
.sp
.nf
-.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
-.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
-.B " void *\fIcallout_data\fP);"
-.fi
-.sp
-This sets up a callout function for PCRE2 to call after each substitution
-made by \fBpcre2_substitute()\fP. Details are given in the section entitled
-"Creating a new string with substitutions"
-.\" HTML <a href="#substitutions">
-.\" </a>
-below.
-.\"
-.sp
-.nf
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
.B " PCRE2_SIZE \fIvalue\fP);"
.fi
@@ -886,7 +867,7 @@ offset is not found. The \fBpcre2_substitute()\fP function makes no more
substitutions.
.P
For example, if the pattern /abc/ is matched against "123abc" with an offset
-limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be
+limit less than 3, the result is PCRE2_ERROR_NO_MATCH. A match can never be
found if the \fIstartoffset\fP argument of \fBpcre2_match()\fP,
\fBpcre2_dfa_match()\fP, or \fBpcre2_substitute()\fP is greater than the offset
limit set in the match context.
@@ -1232,28 +1213,22 @@ are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
Copies of both the code and the tables are made, with the new code pointing to
the new tables. The memory for the new tables is automatically freed when
\fBpcre2_code_free()\fP is called for the new copy of the compiled code. If
-\fBpcre2_code_copy_with_tables()\fP is called with a NULL argument, it returns
+\fBpcre2_code_copy_withy_tables()\fP is called with a NULL argument, it returns
NULL.
.P
NOTE: When one of the matching functions is called, pointers to the compiled
pattern and the subject string are set in the match data block so that they can
-be referenced by the substring extraction functions after a successful match.
-After running a match, you must not free a compiled pattern or a subject string
-until after all operations on the
+be referenced by the substring extraction functions. After running a match, you
+must not free a compiled pattern (or a subject string) until after all
+operations on the
.\" HTML <a href="#matchdatablock">
.\" </a>
match data block
.\"
-have taken place, unless, in the case of the subject string, you have used the
-PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
-"Option bits for \fBpcre2_match()\fP"
-.\" HTML <a href="#matchoptions>">
-.\" </a>
-below.
-.\"
+have taken place.
.P
The \fIoptions\fP argument for \fBpcre2_compile()\fP contains various bit
-settings that affect the compilation. It should be zero if none of them are
+settings that affect the compilation. It should be zero if no options are
required. The available options are described below. Some of them (in
particular, those that are compatible with Perl, but some others as well) can
also be set and unset from within the pattern (see the detailed description in
@@ -1268,9 +1243,8 @@ contents of the \fIoptions\fP argument specifies their settings at the start of
compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
options can be set at the time of matching as well as at compile time.
.P
-Some additional options and less frequently required compile-time parameters
-(for example, the newline setting) can be provided in a compile context (as
-described
+Other, less frequently required compile-time parameters (for example, the
+newline setting) can be provided in a compile context (as described
.\" HTML <a href="#compilecontext">
.\" </a>
above).
@@ -1327,11 +1301,6 @@ This code fragment shows a typical straightforward call to
&erroffset, /* for error offset */
NULL); /* no compile context */
.sp
-.
-.
-.SS "Main compile options"
-.rs
-.sp
The following names for option bits are defined in the \fBpcre2.h\fP header
file:
.sp
@@ -1368,16 +1337,6 @@ hexadecimal digits, in which case the hexadecimal number defines the code point
to match. By default, as in Perl, a hexadecimal number is always expected after
\ex, but it may have zero, one, or two digits (so, for example, \exz matches a
binary zero character followed by z).
-.P
-ECMAscript 6 added additional functionality to \eu. This can be accessed using
-the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
-.\" HTML <a href="#extracompileoptions">
-.\" </a>
-below).
-.\"
-Note that this alternative escape handling applies only to patterns. Neither of
-these options affects the processing of replacement strings passed to
-\fBpcre2_substitute()\fP.
.sp
PCRE2_ALT_CIRCUMFLEX
.sp
@@ -1446,10 +1405,10 @@ independent of the setting of PCRE2_DOTALL.
.sp
PCRE2_DUPNAMES
.sp
-If this bit is set, names used to identify capture groups need not be unique.
-This can be helpful for certain types of pattern when it is known that only one
-instance of the named group can ever be matched. There are more details of
-named capture groups below; see also the
+If this bit is set, names used to identify capturing subpatterns need not be
+unique. This can be helpful for certain types of pattern when it is known that
+only one instance of the named subpattern can ever be matched. There are more
+details of named subpatterns below; see also the
.\" HREF
\fBpcre2pattern\fP
.\"
@@ -1483,11 +1442,11 @@ the end of the subject.
If this bit is set, most white space characters in the pattern are totally
ignored except when escaped or inside a character class. However, white space
is not allowed within sequences such as (?> that introduce various
-parenthesized groups, nor within numerical quantifiers such as {1,3}. Ignorable
-white space is permitted between an item and a following quantifier and between
-a quantifier and a following + that indicates possessiveness. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+parenthesized subpatterns, nor within numerical quantifiers such as {1,3}.
+Ignorable white space is permitted between an item and a following quantifier
+and between a quantifier and a following + that indicates possessiveness.
+PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
+a pattern by a (?x) option setting.
.P
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
white space only those characters with code points less than 256 that are
@@ -1564,7 +1523,7 @@ error.
.sp
PCRE2_MATCH_UNSET_BACKREF
.sp
-If this option is set, a backreference to an unset capture group matches an
+If this option is set, a backreference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
@@ -1625,7 +1584,7 @@ If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). This is the same as Perl's /n option.
-Note that, when this option is set, references to capture groups
+Note that, when this option is set, references to capturing groups
(backreferences or recursion/subroutine calls) may only refer to named groups,
though the reference can be by name or by number.
.sp
@@ -1644,7 +1603,7 @@ purposes.
If this option is set, it disables an optimization that is applied when .* is
the first significant item in a top-level branch of a pattern, and all the
other branches also start with .* or with \eA or \eG or ^. The optimization is
-automatically disabled for .* if it is inside an atomic group or a capture
+automatically disabled for .* if it is inside an atomic group or a capturing
group that is the subject of a backreference, or if the pattern contains
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
automatically anchored if PCRE2_DOTALL is set for all the .* items and
@@ -1805,8 +1764,9 @@ characters with code points greater than 127.
.SS "Extra compile options"
.rs
.sp
-The option bits that can be set in a compile context by calling the
-\fBpcre2_set_compile_extra_options()\fP function are as follows:
+Unlike the main compile-time options, the extra options are not saved with the
+compiled pattern. The option bits that can be set in a compile context by
+calling the \fBpcre2_set_compile_extra_options()\fP function are as follows:
.sp
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
.sp
@@ -1830,14 +1790,6 @@ point values in UTF-8 and UTF-32 patterns no longer provoke errors and are
incorporated in the compiled pattern. However, they can only match subject
characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
.sp
- PCRE2_EXTRA_ALT_BSUX
-.sp
-The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in
-the way that ECMAscript (aka JavaScript) does. Additional functionality was
-defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
-PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
-character code, where hhh.. is any number of hexadecimal digits.
-.sp
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
.sp
This is a dangerous option. Use with care. By default, an unrecognized escape
@@ -1849,22 +1801,11 @@ Perl's warning switch is enabled. However, a malformed octal number after \eo{
always causes an error in Perl.
.P
If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
-\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are
+\fBpcre2_compile()\fP, all unrecognized or erroneous escape sequences are
treated as single-character escapes. For example, \ej is a literal "j" and
\ex{2z} is treated as the literal string "x{2z}". Setting this option means
-that typos in patterns may go undetected and have unexpected results. Also note
-that a sequence such as [\eN{] is interpreted as a malformed attempt at
-[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an
-unqualified \eN is a valid escape sequence but is not supported in a character
-class. To reiterate: this is a dangerous option. Use with great care.
-.sp
- PCRE2_EXTRA_ESCAPED_CR_IS_LF
-.sp
-There are some legacy applications where the escape sequence \er in a pattern
-is expected to match a newline. If this option is set, \er in a pattern is
-converted to \en so that it matches a LF (linefeed) instead of a CR (carriage
-return) character. The option does not affect a literal CR in the pattern, nor
-does it affect CR specified as an explicit code point such as \ex{0D}.
+that typos in patterns may go undetected and have unexpected results. This is a
+dangerous option. Use with care.
.sp
PCRE2_EXTRA_MATCH_LINE
.sp
@@ -2049,7 +1990,7 @@ following are true:
.sp
.* is not in an atomic group
.\" JOIN
- .* is not in a capture group that is the subject
+ .* is not in a capturing group that is the subject
of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
@@ -2061,12 +2002,12 @@ options returned for PCRE2_INFO_ALLOPTIONS.
PCRE2_INFO_BACKREFMAX
.sp
Return the number of the highest backreference in the pattern. The third
-argument should point to an \fBuint32_t\fP variable. Named capture groups
-acquire numbers as well as names, and these count towards the highest
-backreference. Backreferences such as \e4 or \eg{12} match the captured
-characters of the given group, but in addition, the check that a capture
-group is set in a conditional group such as (?(3)a|b) is also a backreference.
-Zero is returned if there are no backreferences.
+argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire
+numbers as well as names, and these count towards the highest backreference.
+Backreferences such as \e4 or \eg{12} match the captured characters of the
+given group, but in addition, the check that a capturing group is set in a
+conditional subpattern such as (?(3)a|b) is also a backreference. Zero is
+returned if there are no backreferences.
.sp
PCRE2_INFO_BSR
.sp
@@ -2077,9 +2018,9 @@ that \eR matches only CR, LF, or CRLF.
.sp
PCRE2_INFO_CAPTURECOUNT
.sp
-Return the highest capture group number in the pattern. In patterns where (?|
-is not used, this is also the total number of capture groups. The third
-argument should point to an \fBuint32_t\fP variable.
+Return the highest capturing subpattern number in the pattern. In patterns
+where (?| is not used, this is also the total number of capturing subpatterns.
+The third argument should point to an \fBuint32_t\fP variable.
.sp
PCRE2_INFO_DEPTHLIMIT
.sp
@@ -2127,7 +2068,7 @@ Return the size (in bytes) of the data frames that are used to remember
backtracking positions when the pattern is processed by \fBpcre2_match()\fP
without the use of JIT. The third argument should point to a \fBsize_t\fP
variable. The frame size depends on the number of capturing parentheses in the
-pattern. Each additional capture group adds two PCRE2_SIZE variables.
+pattern. Each additional capturing group adds two PCRE2_SIZE variables.
.sp
PCRE2_INFO_HASBACKSLASHC
.sp
@@ -2248,11 +2189,11 @@ library, the pointer points to 32-bit code units, the first of which contains
the parenthesis number. The rest of the entry is the corresponding name, zero
terminated.
.P
-The names are in alphabetical order. If (?| is used to create multiple capture
-groups with the same number, as described in the
-.\" HTML <a href="pcre2pattern.html#dupgroupnumber">
+The names are in alphabetical order. If (?| is used to create multiple groups
+with the same number, as described in the
+.\" HTML <a href="pcre2pattern.html#dupsubpatternnumber">
.\" </a>
-section on duplicate group numbers
+section on duplicate subpattern numbers
.\"
in the
.\" HREF
@@ -2261,11 +2202,11 @@ in the
page, the groups may be given the same name, but there is only one entry in the
table. Different names for groups of the same number are not permitted.
.P
-Duplicate names for capture groups with different numbers are permitted, but
-only if PCRE2_DUPNAMES is set. They appear in the table in the order in which
-they were found in the pattern. In the absence of (?| this is the order of
+Duplicate names for subpatterns with different numbers are permitted, but only
+if PCRE2_DUPNAMES is set. They appear in the table in the order in which they
+were found in the pattern. In the absence of (?| this is the order of
increasing number; when (?| is used this is not necessarily the case because
-later capture groups may have lower numbers.
+later subpatterns may have lower numbers.
.P
As a simple example of the name/number table, consider the following pattern
after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white
@@ -2275,16 +2216,16 @@ space - including newlines - is ignored):
(?<date> (?<year>(\ed\ed)?\ed\ed) -
(?<month>\ed\ed) - (?<day>\ed\ed) )
.sp
-There are four named capture groups, so the table has four entries, and each
-entry in the table is eight bytes long. The table is as follows, with
-non-printing bytes shows in hexadecimal, and undefined bytes shown as ??:
+There are four named subpatterns, so the table has four entries, and each entry
+in the table is eight bytes long. The table is as follows, with non-printing
+bytes shows in hexadecimal, and undefined bytes shown as ??:
.sp
00 01 d a t e 00 ??
00 05 d a y 00 ?? ??
00 04 m o n t h 00
00 02 y e a r 00 ??
.sp
-When writing code to extract data from named capture groups using the
+When writing code to extract data from named subpatterns using the
name-to-number map, remember that the length of the entries is likely to be
different for each compiled pattern.
.sp
@@ -2421,16 +2362,9 @@ on the error, and is detailed below.
.P
When one of the matching functions is called, pointers to the compiled pattern
and the subject string are set in the match data block so that they can be
-referenced by the extraction functions after a successful match. After running
-a match, you must not free a compiled pattern or a subject string until after
-all operations on the match data block (for that match) have taken place,
-unless, in the case of the subject string, you have used the
-PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
-"Option bits for \fBpcre2_match()\fP"
-.\" HTML <a href="#matchoptions>">
-.\" </a>
-below.
-.\"
+referenced by the extraction functions. After running a match, you must not
+free a compiled pattern or a subject string until after all operations on the
+match data block (for that match) have taken place.
.P
When a match data block itself is no longer needed, it should be freed by
calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL
@@ -2547,10 +2481,10 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA.
.rs
.sp
The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be
-zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
-PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
+zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
+Their action is described below.
.P
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
@@ -2565,22 +2499,6 @@ to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT
matching.
.sp
- PCRE2_COPY_MATCHED_SUBJECT
-.sp
-By default, a pointer to the subject is remembered in the match data block so
-that, after a successful match, it can be referenced by the substring
-extraction functions. This means that the subject's memory must not be freed
-until all such operations are complete. For some applications where the
-lifetime of the subject string is not guaranteed, it may be necessary to make a
-copy of the subject string, but it is wasteful to do this unless the match is
-successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the
-subject is copied and the new pointer is remembered in the match data block
-instead of the original subject pointer. The memory allocator that was used for
-the match block itself is used. The copy is automatically freed when
-\fBpcre2_match_data_free()\fP is called to free the match data block. It is also
-automatically freed if the match data block is re-used for another match
-operation.
-.sp
PCRE2_ENDANCHORED
.sp
If the PCRE2_ENDANCHORED option is set, any string that \fBpcre2_match()\fP
@@ -2764,12 +2682,12 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern.
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
-book, this is called "capturing" in what follows, and the phrase "capture
-group" (Perl terminology) is used for a fragment of a pattern that picks out a
-substring. PCRE2 supports several other kinds of parenthesized group that do
-not cause substrings to be captured. The \fBpcre2_pattern_info()\fP function
-can be used to find out how many capture groups there are in a compiled
-pattern.
+book, this is called "capturing" in what follows, and the phrase "capturing
+subpattern" or "capturing group" is used for a fragment of a pattern that picks
+out a substring. PCRE2 supports several other kinds of parenthesized subpattern
+that do not cause substrings to be captured. The \fBpcre2_pattern_info()\fP
+function can be used to find out how many capturing subpatterns there are in a
+compiled pattern.
.P
You can use auxiliary functions for accessing captured substrings
.\" HTML <a href="#extractbynumber">
@@ -2822,28 +2740,30 @@ reported start of a successful match can be greater than the end of the match.
For example, if the pattern (?=ab\eK) is matched against "ab", the start and
end offset values for the match are 2 and 0.
.P
-If a capture group is matched repeatedly within a single match operation, it is
-the last portion of the subject that it matched that is returned.
+If a capturing subpattern group is matched repeatedly within a single match
+operation, it is the last portion of the subject that it matched that is
+returned.
.P
If the ovector is too small to hold all the captured substring offsets, as much
as possible is filled in, and the function returns a value of zero. If captured
substrings are not of interest, \fBpcre2_match()\fP may be called with a match
data block whose ovector is of minimum length (that is, one pair).
.P
-It is possible for capture group number \fIn+1\fP to match some part of the
-subject when group \fIn\fP has not been used at all. For example, if the string
-"abc" is matched against the pattern (a|(z))(bc) the return from the function
-is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both
-values in the offset pairs corresponding to unused groups are set to
+It is possible for capturing subpattern number \fIn+1\fP to match some part of
+the subject when subpattern \fIn\fP has not been used at all. For example, if
+the string "abc" is matched against the pattern (a|(z))(bc) the return from the
+function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
+happens, both values in the offset pairs corresponding to unused subpatterns
+are set to PCRE2_UNSET.
+.P
+Offset values that correspond to unused subpatterns at the end of the
+expression are also set to PCRE2_UNSET. For example, if the string "abc" is
+matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched.
+The return from the function is 2, because the highest used capturing
+subpattern number is 1. The offsets for for the second and third capturing
+subpatterns (assuming the vector is large enough, of course) are set to
PCRE2_UNSET.
.P
-Offset values that correspond to unused groups at the end of the expression are
-also set to PCRE2_UNSET. For example, if the string "abc" is matched against
-the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the
-function is 2, because the highest used capture group number is 1. The offsets
-for for the second and third capture groupss (assuming the vector is large
-enough, of course) are set to PCRE2_UNSET.
-.P
Elements in the ovector that do not correspond to capturing parentheses in the
pattern are never changed. That is, if a pattern contains \fIn\fP capturing
parentheses, no more than \fIovector[0]\fP to \fIovector[2n+1]\fP are set by
@@ -2867,22 +2787,21 @@ appropriate circumstances. If they are called at other times, the result is
undefined.
.P
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
-to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
-\fBpcre2_get_mark()\fP can be called to access this name, which can be
-specified in the pattern by any of the backtracking control verbs, not just
-(*MARK). The same function applies to all the verbs. It returns a pointer to
-the zero-terminated name, which is within the compiled pattern. If no name is
+to match (PCRE2_ERROR_NOMATCH), a (*MARK), (*PRUNE), or (*THEN) name may be
+available. The function \fBpcre2_get_mark()\fP can be called to access this
+name. The same function applies to all three verbs. It returns a pointer to the
+zero-terminated name, which is within the compiled pattern. If no name is
available, NULL is returned. The length of the name (excluding the terminating
zero) is stored in the code unit that precedes the name. You should use this
length instead of relying on the terminating zero if the name might contain a
binary zero.
.P
-After a successful match, the name that is returned is the last mark name
-encountered on the matching path through the pattern. Instances of backtracking
-verbs without names do not count. Thus, for example, if the matching path
-contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a
-partial match, the last encountered name is returned. For example, consider
-this pattern:
+After a successful match, the name that is returned is the last (*MARK),
+(*PRUNE), or (*THEN) name encountered on the matching path through the pattern.
+Instances of (*PRUNE) and (*THEN) without names are ignored. Thus, for example,
+if the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned.
+After a "no match" or a partial match, the last encountered name is returned.
+For example, consider this pattern:
.sp
^(*MARK:A)((*MARK:B)a|b)c
.sp
@@ -2896,7 +2815,7 @@ is removed from the pattern above, there is an initial check for the presence
of "c" in the subject before running the matching engine. This check fails for
"bx", causing a match failure without seeing any marks. You can disable the
start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
-\fBpcre2_compile()\fP or by starting the pattern with (*NO_START_OPT).
+\fBpcre2_compile()\fP or starting the pattern with (*NO_START_OPT).
.P
After a successful match, a partial match, or one of the invalid UTF errors
(for example, PCRE2_ERROR_UTF8_ERR5), \fBpcre2_get_startchar()\fP can be
@@ -3016,8 +2935,7 @@ The backtracking match limit was reached.
If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+if the amount of memory needed exceeds the heap limit.
.sp
PCRE2_ERROR_NULL
.sp
@@ -3028,11 +2946,11 @@ as NULL.
.sp
This error is returned when \fBpcre2_match()\fP detects a recursion loop within
the pattern. Specifically, it means that either the whole pattern or a
-capture group has been called recursively for the second time at the same
-position in the subject string. Some simple patterns that might do this are
-detected and faulted at compile time, but more complicated cases, in particular
-mutual recursions between two different groups, cannot be detected until
-matching is attempted.
+subpattern has been called recursively for the second time at the same position
+in the subject string. Some simple patterns that might do this are detected and
+faulted at compile time, but more complicated cases, in particular mutual
+recursions between two different subpatterns, cannot be detected until matching
+is attempted.
.
.
.\" HTML <a name="geterrormessage"></a>
@@ -3112,7 +3030,7 @@ The \fBpcre2_substring_copy_bynumber()\fP function copies a captured substring
into a supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it
into new memory, obtained using the same memory allocation function that was
used for the match data block. The first two arguments of these functions are a
-pointer to the match data block and a capture group number.
+pointer to the match data block and a capturing group number.
.P
The final arguments of \fBpcre2_substring_copy_bynumber()\fP are a pointer to
the buffer and a pointer to a variable that contains its length in code units.
@@ -3184,9 +3102,9 @@ could not be obtained. When the list is no longer needed, it should be freed by
calling \fBpcre2_substring_list_free()\fP.
.P
If this function encounters a substring that is unset, which can happen when
-capture group number \fIn+1\fP matches some part of the subject, but group
-\fIn\fP has not been used at all, it returns an empty string. This can be
-distinguished from a genuine zero-length substring by inspecting the
+capturing subpattern number \fIn+1\fP matches some part of the subject, but
+subpattern \fIn\fP has not been used at all, it returns an empty string. This
+can be distinguished from a genuine zero-length substring by inspecting the
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
substrings, or by calling \fBpcre2_substring_length_bynumber()\fP.
.
@@ -3216,20 +3134,20 @@ For example, for this pattern:
.sp
(a+)b(?<xxx>\ed+)...
.sp
-the number of the capture group called "xxx" is 2. If the name is known to be
+the number of the subpattern called "xxx" is 2. If the name is known to be
unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
calling \fBpcre2_substring_number_from_name()\fP. The first argument is the
compiled pattern, and the second is the name. The yield of the function is the
-group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or
-PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name.
-Given the number, you can extract the substring directly from the ovector, or
-use one of the "bynumber" functions described above.
+subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
+name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
+that name. Given the number, you can extract the substring directly from the
+ovector, or use one of the "bynumber" functions described above.
.P
For convenience, there are also "byname" functions that correspond to the
"bynumber" functions, the only difference being that the second argument is a
name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
names, these functions scan all the groups with the given name, and return the
-captured substring from the first named group that is set.
+first named string that is set.
.P
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
returned. If all groups with the name have numbers that are greater than the
@@ -3238,22 +3156,21 @@ is at least one group with a slot in the ovector, but no group is found to be
set, PCRE2_ERROR_UNSET is returned.
.P
\fBWarning:\fP If the pattern uses the (?| feature to set up multiple
-capture groups with the same number, as described in the
-.\" HTML <a href="pcre2pattern.html#dupgroupnumber">
+subpatterns with the same number, as described in the
+.\" HTML <a href="pcre2pattern.html#dupsubpatternnumber">
.\" </a>
-section on duplicate group numbers
+section on duplicate subpattern numbers
.\"
in the
.\" HREF
\fBpcre2pattern\fP
.\"
-page, you cannot use names to distinguish the different capture groups, because
+page, you cannot use names to distinguish the different subpatterns, because
names are not included in the compiled code. The matching process uses only
-numbers. For this reason, the use of different names for groups with the
+numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
.
.
-.\" HTML <a name="substitutions"></a>
.SH "CREATING A NEW STRING WITH SUBSTITUTIONS"
.rs
.sp
@@ -3262,22 +3179,19 @@ same number causes an error at compile time.
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP,"
-.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
+.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
.fi
.P
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
-string in \fIoutputbuffer\fP, replacing one or more parts that were matched
-with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
-.P
-Matches in which a \eK item in a lookahead in the pattern causes the match to
-end before it starts are not supported, and give rise to an error return. For
-global replacements, matches in which \eK in a lookbehind causes the match to
-start earlier than the point that was reached in the previous iteration are
-also not supported.
+string in \fIoutputbuffer\fP, replacing the part that was matched with the
+\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
+be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
+which a \eK item in a lookahead in the pattern causes the match to end before
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \eK in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
@@ -3287,9 +3201,9 @@ functions from the match context, if provided, or else those that were used to
allocate memory for the compiled code.
.P
If an external \fImatch_data\fP block is provided, its contents afterwards
-are those set by the final call to \fBpcre2_match()\fP. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+are those set by the final call to \fBpcre2_match()\fP, which will have
+ended in a matching error. The contents of the ovector within the match data
+block may or may not have been changed.
.P
The \fIoutlengthptr\fP argument must point to a variable that contains the
length, in code units, of the output buffer. If the function is successful, the
@@ -3310,12 +3224,12 @@ length is in code units, not bytes.
In the replacement string, which is interpreted as a UTF string in UTF mode,
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
+pattern. The following forms are always recognized:
.sp
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
- $*MARK or ${*MARK} insert a control verb name
+ $*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
.sp
Either a group number or a group name can be given for <n>. Curly brackets are
required only if the following character would be interpreted as part of the
@@ -3323,11 +3237,11 @@ number or name. The number may be zero to include the entire matched string.
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
string "+$1$0$1+", the result is "=+babcb+=".
.P
-$*MARK inserts the name from the last encountered backtracking control verb on
-the matching path that has a name. (*MARK) must always include a name, but the
-other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name
-inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This
-facility can be used to perform simple simultaneous substitutions, as this
+$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
+on the matching path that has a name. (*MARK) must always include a name, but
+(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
+the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
+This facility can be used to perform simple simultaneous substitutions, as this
\fBpcre2test\fP example shows:
.sp
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
@@ -3373,12 +3287,12 @@ operation is carried out twice. Depending on the application, it may be more
efficient to allocate a large buffer and free the excess afterwards, instead of
using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
.P
-PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups that do
not appear in the pattern to be treated as unset groups. This option should be
used with care, because it means that a typo in a group name or number no
longer causes the PCRE2_ERROR_NOSUBSTRING error.
.P
-PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown
+PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including unknown
groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty
strings when inserted as described above. If this option is not set, an attempt
to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does
@@ -3403,16 +3317,14 @@ terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
\eu and \el force the next character (if it is a letter) to upper or lower
case, respectively, and then the state automatically reverts to no case
forcing. Case forcing applies to all inserted characters, including those from
-capture groups and letters within \eQ...\eE quoted sequences.
+captured groups and letters within \eQ...\eE quoted sequences.
.P
Note that case forcing sequences such as \eU...\eE do not nest. For example,
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
-effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
-not apply to not apply to replacement strings.
+effect.
.P
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
-flexibility to capture group substitution. The syntax is similar to that used
-by Bash:
+flexibility to group substitution. The syntax is similar to that used by Bash:
.sp
${<n>:-<string>}
${<n>:+<string1>:<string2>}
@@ -3439,9 +3351,9 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
groups in the extended syntax forms to be treated as unset.
.P
-If successful, \fBpcre2_substitute()\fP returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
+If successful, \fBpcre2_substitute()\fP returns the number of replacements that
+were made. This may be zero if no matches were found, and is never greater than
+1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
.P
In the event of an error, a negative error code is returned. Except for
PCRE2_ERROR_NOMATCH (which is never returned), errors from \fBpcre2_match()\fP
@@ -3476,65 +3388,7 @@ above).
.\"
.
.
-.SS "Substitution callouts"
-.rs
-.sp
-.nf
-.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
-.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
-.B " void *\fIcallout_data\fP);"
-.fi
-.sp
-The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
-callout function for \fBpcre2_substitute()\fP. This information is passed in
-a match context. The callout function is called after each substitution has
-been processed, but it can cause the replacement not to happen. The callout
-function is not called for simulated substitutions that happen as a result of
-the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
-.P
-The first argument of the callout function is a pointer to a substitute callout
-block structure, which contains the following fields, not necessarily in this
-order:
-.sp
- uint32_t \fIversion\fP;
- uint32_t \fIsubscount\fP;
- PCRE2_SPTR \fIinput\fP;
- PCRE2_SPTR \fIoutput\fP;
- PCRE2_SIZE \fI*ovector\fP;
- uint32_t \fIoveccount\fP;
- PCRE2_SIZE \fIoutput_offsets[2]\fP;
-.sp
-The \fIversion\fP field contains the version number of the block format. The
-current version is 0. The version number will increase in future if more fields
-are added, but the intention is never to remove any of the existing fields.
-.P
-The \fIsubscount\fP field is the number of the current match. It is 1 for the
-first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP
-pointers are copies of the values passed to \fBpcre2_substitute()\fP.
-.P
-The \fIovector\fP field points to the ovector, which contains the result of the
-most recent match. The \fIoveccount\fP field contains the number of pairs that
-are set in the ovector, and is always greater than zero.
-.P
-The \fIoutput_offsets\fP vector contains the offsets of the replacement in the
-output string. This has already been processed for dollar and (if requested)
-backslash substitutions as described above.
-.P
-The second argument of the callout function is the value passed as
-\fIcallout_data\fP when the function was registered. The value returned by the
-callout function is interpreted as follows:
-.P
-If the value is zero, the replacement is accepted, and, if
-PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next
-match. If the value is not zero, the current replacement is not accepted. If
-the value is greater than zero, processing continues when
-PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or
-PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is copied to the
-output and the call to \fBpcre2_substitute()\fP exits, returning the number of
-matches so far.
-.
-.
-.SH "DUPLICATE CAPTURE GROUP NAMES"
+.SH "DUPLICATE SUBPATTERN NAMES"
.rs
.sp
.nf
@@ -3542,14 +3396,13 @@ matches so far.
.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);"
.fi
.P
-When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture
-groups are not required to be unique. Duplicate names are always allowed for
-groups with the same number, created by using the (?| feature. Indeed, if such
-groups are named, they are required to use the same names.
+When a pattern is compiled with the PCRE2_DUPNAMES option, names for
+subpatterns are not required to be unique. Duplicate names are always allowed
+for subpatterns with the same number, created by using the (?| feature. Indeed,
+if such subpatterns are named, they are required to use the same names.
.P
-Normally, patterns that use duplicate names are such that in any one match,
-only one of each set of identically-named groups participates. An example is
-shown in the
+Normally, patterns with duplicate names are such that in any one match, only
+one of the named subpatterns participates. An example is shown in the
.\" HREF
\fBpcre2pattern\fP
.\"
@@ -3659,12 +3512,11 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP:
.rs
.sp
The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must
-be zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
-PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last
-four of these are exactly the same as for \fBpcre2_match()\fP, so their
-description is not repeated here.
+be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
+and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
+for \fBpcre2_match()\fP, so their description is not repeated here.
.sp
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
@@ -3730,8 +3582,9 @@ the three matched strings are
On success, the yield of the function is a number greater than zero, which is
the number of matched substrings. The offsets of the substrings are returned in
the ovector, and can be extracted by number in the same way as for
-\fBpcre2_match()\fP, but the numbers bear no relation to any capture groups
-that may exist in the pattern, because DFA matching does not support capturing.
+\fBpcre2_match()\fP, but the numbers bear no relation to any capturing groups
+that may exist in the pattern, because DFA matching does not support group
+capture.
.P
Calls to the convenience functions that extract substrings by name
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
@@ -3773,7 +3626,7 @@ a backreference.
.sp
This return is given if \fBpcre2_dfa_match()\fP encounters a condition item
that uses a backreference for the condition, or a test for recursion in a
-specific capture group. These are not supported.
+specific group. These are not supported.
.sp
PCRE2_ERROR_DFA_WSSIZE
.sp
@@ -3782,9 +3635,9 @@ This return is given if \fBpcre2_dfa_match()\fP runs out of space in the
.sp
PCRE2_ERROR_DFA_RECURSE
.sp
-When a recursion or subroutine call is processed, the matching function calls
-itself recursively, using private memory for the ovector and \fIworkspace\fP.
-This error is given if the internal ovector is not large enough. This should be
+When a recursive subpattern is processed, the matching function calls itself
+recursively, using private memory for the ovector and \fIworkspace\fP. This
+error is given if the internal ovector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
.sp
PCRE2_ERROR_DFA_BADRESTART
@@ -3817,6 +3670,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 14 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 07 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2build.3 b/dist2/doc/pcre2build.3
index f1d28f8b..540df789 100644
--- a/dist2/doc/pcre2build.3
+++ b/dist2/doc/pcre2build.3
@@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "03 March 2019" "PCRE2 10.33"
+.TH PCRE2BUILD 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.
@@ -373,17 +373,14 @@ environment.
.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS"
.rs
.sp
-By default \fBpcre2grep\fP supports the use of callouts with string arguments
-within the patterns it is matching. There are two kinds: one that generates
-output using local code, and another that calls an external program or script.
-If --disable-pcre2grep-callout-fork is added to the \fBconfigure\fP command,
-only the first kind of callout is supported; if --disable-pcre2grep-callout is
-used, all callouts are completely ignored. For more details of \fBpcre2grep\fP
-callouts, see the
+By default, on non-Windows systems, \fBpcre2grep\fP supports the use of
+callouts with string arguments within the patterns it is matching, in order to
+run external scripts. For details, see the
.\" HREF
\fBpcre2grep\fP
.\"
-documentation.
+documentation. This support can be disabled by adding
+--disable-pcre2grep-callout to the \fBconfigure\fP command.
.
.
.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT"
@@ -536,22 +533,6 @@ information about code coverage, see the \fBgcov\fP and \fBlcov\fP
documentation.
.
.
-.SH "DISABLING THE Z AND T FORMATTING MODIFIERS"
-.rs
-.sp
-The C99 standard defines formatting modifiers z and t for size_t and
-ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
-However, there is at least one environment that claims to be C99 but does not
-support these modifiers. If
-.sp
- --disable-percent-zt
-.sp
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
-%lu is used, with a cast for size_t values.
-.
-.
.SH "SUPPORT FOR FUZZERS"
.rs
.sp
@@ -610,6 +591,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 03 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 26 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2callout.3 b/dist2/doc/pcre2callout.3
index adb411b5..c815c722 100644
--- a/dist2/doc/pcre2callout.3
+++ b/dist2/doc/pcre2callout.3
@@ -1,4 +1,4 @@
-.TH PCRE2CALLOUT 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2CALLOUT 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -27,15 +27,6 @@ a match context (see \fBpcre2_set_callout()\fP in the
.\"
documentation).
.P
-When using the \fBpcre2_substitute()\fP function, an additional callout feature
-is available. This does a callout after each change to the subject string and
-is described in the
-.\" HREF
-\fBpcre2api\fP
-.\"
-documentation; the rest of this document is concerned with callouts during
-pattern matching.
-.P
Within a regular expression, (?C<arg>) indicates a point at which the external
function is to be called. Different callout points can be identified by putting
a number less than 256 after the letter C. The default value is zero.
@@ -137,7 +128,7 @@ start only after an internal newline or at the beginning of the subject, and
branch, automatic anchoring occurs if all branches are anchorable.
.P
This optimization is disabled, however, if .* is in an atomic group or if there
-is a backreference to the capture group in which it appears. It is also
+is a backreference to the capturing group in which it appears. It is also
disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of
callouts does not affect it.
.P
@@ -331,8 +322,8 @@ callout before an assertion such as (?=ab) the length is 3. For an an
alternation bar or a closing parenthesis, the length is one, unless a closing
parenthesis is followed by a quantifier, in which case its length is included.
(This changed in release 10.23. In earlier releases, before an opening
-parenthesis the length was that of the entire group, and before an alternation
-bar or a closing parenthesis the length was zero.)
+parenthesis the length was that of the entire subpattern, and before an
+alternation bar or a closing parenthesis the length was zero.)
.P
The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to
help in distinguishing between different automatic callouts, which all have the
@@ -452,6 +443,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 26 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2compat.3 b/dist2/doc/pcre2compat.3
index 39ccc2ea..6e448f6c 100644
--- a/dist2/doc/pcre2compat.3
+++ b/dist2/doc/pcre2compat.3
@@ -1,4 +1,4 @@
-.TH PCRE2COMPAT 3 "12 February 2019" "PCRE2 10.33"
+.TH PCRE2COMPAT 3 "28 July 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
@@ -23,9 +23,10 @@ character is not "a" three times (in principle; PCRE2 optimizes this to run the
assertion just once). Perl allows some repeat quantifiers on other assertions,
for example, \eb* (but not \eb{3}), but these do not seem to have any use.
.P
-3. Capture groups that occur inside negative lookaround assertions are counted,
-but their entries in the offsets vector are set only when a negative assertion
-is a condition that has a matching branch (that is, the condition is false).
+3. Capturing subpatterns that occur inside negative lookaround assertions are
+counted, but their entries in the offsets vector are set only when a negative
+assertion is a condition that has a matching branch (that is, the condition is
+false).
.P
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
\eU, and \eN when followed by a character name. \eN on its own, matching a
@@ -33,9 +34,8 @@ non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
supported. The escapes that modify the case of following letters are
implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE2, an error is
-generated by default. However, if either of the PCRE2_ALT_BSUX or
-PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript
-interprets them.
+generated by default. However, if the PCRE2_ALT_BSUX option is set, \eU and \eu
+are interpreted as ECMAScript interprets them.
.P
5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
built with Unicode support (the default). The properties that can be tested
@@ -79,13 +79,13 @@ documentation for details.
to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
into subroutine calls is now supported, as in Perl.
.P
-9. If any of the backtracking control verbs are used in a group that is called
-as a subroutine (whether or not recursively), their effect is confined to that
-group; it does not extend to the surrounding pattern. This is not always the
-case in Perl. In particular, if (*THEN) is present in a group that is called as
-a subroutine, its action is limited to that group, even if the group does not
-contain any | characters. Note that such groups are processed as anchored
-at the point where they are tested.
+9. If any of the backtracking control verbs are used in a subpattern that is
+called as a subroutine (whether or not recursively), their effect is confined
+to that subpattern; it does not extend to the surrounding pattern. This is not
+always the case in Perl. In particular, if (*THEN) is present in a group that
+is called as a subroutine, its action is limited to that group, even if the
+group does not contain any | characters. Note that such subpatterns are
+processed as anchored at the point where they are tested.
.P
10. If a pattern contains more than one backtracking control verb, the first
one that is backtracked onto acts. For example, in the pattern
@@ -101,20 +101,21 @@ strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
"b".
.P
-13. PCRE2's handling of duplicate capture group numbers and names is not as
-general as Perl's. This is a consequence of the fact the PCRE2 works internally
-just with numbers, using an external table to translate between numbers and
-names. In particular, a pattern such as (?|(?<a>A)|(?<b>B), where the two
-capture groups have the same number but different names, is not supported, and
-causes an error at compile time. If it were allowed, it would not be possible
-to distinguish which group matched, because both names map to capture group
-number 1. To avoid this confusing situation, an error is given at compile time.
+13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
+names is not as general as Perl's. This is a consequence of the fact the PCRE2
+works internally just with numbers, using an external table to translate
+between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B),
+where the two capturing parentheses have the same number but different names,
+is not supported, and causes an error at compile time. If it were allowed, it
+would not be possible to distinguish which parentheses matched, because both
+names map to capturing subpattern number 1. To avoid this confusing situation,
+an error is given at compile time.
.P
14. Perl used to recognize comments in some places that PCRE2 does not, for
-example, between the ( and ? at the start of a group. If the /x modifier is
-set, Perl allowed white space between ( and ? though the latest Perls give an
-error (for a while it was just deprecated). There may still be some cases where
-Perl behaves differently.
+example, between the ( and ? at the start of a subpattern. If the /x modifier
+is set, Perl allowed white space between ( and ? though the latest Perls give
+an error (for a while it was just deprecated). There may still be some cases
+where Perl behaves differently.
.P
15. Perl, when in warning mode, gives warnings for character classes such as
[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
@@ -199,6 +200,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 12 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 28 July 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2grep.1 b/dist2/doc/pcre2grep.1
index 6b3219bb..ce112af7 100644
--- a/dist2/doc/pcre2grep.1
+++ b/dist2/doc/pcre2grep.1
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "24 November 2018" "PCRE2 10.33"
+.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -759,12 +759,10 @@ character. Otherwise \fBpcre2grep\fP will assume that it has no data.
.sp
\fBpcre2grep\fP has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of PCRE2's
-callout facility. However, this support can be completely or partially disabled
-when \fBpcre2grep\fP is built. You can find out whether your binary has support
-for callouts by running it with the \fB--help\fP option. If callout support is
-completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP.
-If the facility is partially disabled, calling external programs is not
-supported, and callouts that request it are ignored.
+callout facility. However, this support can be disabled when \fBpcre2grep\fP is
+built. You can find out whether your binary has support for callouts by running
+it with the \fB--help\fP option. If the support is not enabled, all callouts in
+patterns are ignored by \fBpcre2grep\fP.
.P
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
either a number or a quoted string (see the
@@ -778,11 +776,6 @@ only callouts with string arguments are useful.
.SS "Calling external programs or scripts"
.rs
.sp
-This facility can be independently disabled when \fBpcre2grep\fP is built. It
-is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
-where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
-\fBfork()\fP and \fBexecv()\fP are available.
-.P
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
@@ -810,7 +803,7 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
Arg1: [1] [234] [4] Arg2: |1| ()
12345
.sp
-The parameters for the system call that is used to run the
+The parameters for the \fBexecv()\fP system call that is used to run the
program or script are zero-terminated strings. This means that binary zero
characters in the callout argument will cause premature termination of their
substrings, and therefore should not be present. Any syntax errors in the
@@ -823,15 +816,14 @@ matcher backtracks in the normal way.
.SS "Echoing a specific string"
.rs
.sp
-This facility is always available, provided that callouts were not completely
-disabled when \fBpcre2grep\fP was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+If the callout string starts with a pipe (vertical bar) character, the rest of
+the string is written to the output, having been passed through the same escape
+processing as text from the --output option. This provides a simple echoing
+facility that avoids calling an external program or script. No terminator is
+added to the string, so if you want a newline, you must include it explicitly.
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+relevant pattern with (*FAIL).
.
.
.SH "MATCHING ERRORS"
@@ -884,6 +876,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 24 November 2018
+Last updated: 24 February 2018
Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2grep.txt b/dist2/doc/pcre2grep.txt
index cd44fe00..000239cd 100644
--- a/dist2/doc/pcre2grep.txt
+++ b/dist2/doc/pcre2grep.txt
@@ -832,26 +832,18 @@ USING PCRE2'S CALLOUT FACILITY
pcre2grep has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of
- PCRE2's callout facility. However, this support can be completely or
- partially disabled when pcre2grep is built. You can find out whether
- your binary has support for callouts by running it with the --help
- option. If callout support is completely disabled, all callouts in pat-
- terns are ignored by pcre2grep. If the facility is partially disabled,
- calling external programs is not supported, and callouts that request
- it are ignored.
-
- A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
- ment is either a number or a quoted string (see the pcre2callout docu-
- mentation for details). Numbered callouts are ignored by pcre2grep;
+ PCRE2's callout facility. However, this support can be disabled when
+ pcre2grep is built. You can find out whether your binary has support
+ for callouts by running it with the --help option. If the support is
+ not enabled, all callouts in patterns are ignored by pcre2grep.
+
+ A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
+ ment is either a number or a quoted string (see the pcre2callout docu-
+ mentation for details). Numbered callouts are ignored by pcre2grep;
only callouts with string arguments are useful.
Calling external programs or scripts
- This facility can be independently disabled when pcre2grep is built. It
- is supported for Windows, where a call to _spawnvp() is used, for VMS,
- where lib$spawn() is used, and for any other Unix-like environment
- where fork() and execv() are available.
-
If the callout string does not start with a pipe (vertical bar) charac-
ter, it is parsed into a list of substrings separated by pipe charac-
ters. The first substring must be an executable name, with the follow-
@@ -881,27 +873,27 @@ USING PCRE2'S CALLOUT FACILITY
Arg1: [1] [234] [4] Arg2: |1| ()
12345
- The parameters for the system call that is used to run the program or
- script are zero-terminated strings. This means that binary zero charac-
- ters in the callout argument will cause premature termination of their
- substrings, and therefore should not be present. Any syntax errors in
- the string (for example, a dollar not followed by another character)
- cause the callout to be ignored. If running the program fails for any
- reason (including the non-existence of the executable), a local match-
- ing failure occurs and the matcher backtracks in the normal way.
+ The parameters for the execv() system call that is used to run the pro-
+ gram or script are zero-terminated strings. This means that binary zero
+ characters in the callout argument will cause premature termination of
+ their substrings, and therefore should not be present. Any syntax
+ errors in the string (for example, a dollar not followed by another
+ character) cause the callout to be ignored. If running the program
+ fails for any reason (including the non-existence of the executable), a
+ local matching failure occurs and the matcher backtracks in the normal
+ way.
Echoing a specific string
- This facility is always available, provided that callouts were not com-
- pletely disabled when pcre2grep was built. If the callout string starts
- with a pipe (vertical bar) character, the rest of the string is written
- to the output, having been passed through the same escape processing as
- text from the --output option. This provides a simple echoing facility
- that avoids calling an external program or script. No terminator is
- added to the string, so if you want a newline, you must include it
- explicitly. Matching continues normally after the string is output. If
- you want to see only the callout output but not any output from an
- actual match, you should end the relevant pattern with (*FAIL).
+ If the callout string starts with a pipe (vertical bar) character, the
+ rest of the string is written to the output, having been passed through
+ the same escape processing as text from the --output option. This pro-
+ vides a simple echoing facility that avoids calling an external program
+ or script. No terminator is added to the string, so if you want a new-
+ line, you must include it explicitly. Matching continues normally
+ after the string is output. If you want to see only the callout output
+ but not any output from an actual match, you should end the relevant
+ pattern with (*FAIL).
MATCHING ERRORS
@@ -948,5 +940,5 @@ AUTHOR
REVISION
- Last updated: 24 November 2018
+ Last updated: 24 February 2018
Copyright (c) 1997-2018 University of Cambridge.
diff --git a/dist2/doc/pcre2jit.3 b/dist2/doc/pcre2jit.3
index b7ae2eb1..c3b916b9 100644
--- a/dist2/doc/pcre2jit.3
+++ b/dist2/doc/pcre2jit.3
@@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "06 March 2019" "PCRE2 10.33"
+.TH PCRE2JIT 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@@ -120,36 +120,13 @@ support is not available, or the pattern was not processed by
pattern.
.
.
-.SH "MATCHING SUBJECTS CONTAINING INVALID UTF"
-.rs
-.sp
-When a pattern is compiled with the PCRE2_UTF option, the interpretive matching
-function expects its subject string to be a valid sequence of UTF code units.
-If it is not, the result is undefined. This is also true by default of matching
-via JIT. However, if the option PCRE2_JIT_INVALID_UTF is passed to
-\fBpcre2_jit_compile()\fP, code that can process a subject containing invalid
-UTF is compiled.
-.P
-In this mode, an invalid code unit sequence never matches any pattern item. It
-does not match dot, it does not match \ep{Any}, it does not even match negative
-items such as [^X]. A lookbehind assertion fails if it encounters an invalid
-sequence while moving the current point backwards. In other words, an invalid
-UTF code unit sequence acts as a barrier which no match can cross. Reaching an
-invalid sequence causes an immediate backtrack.
-.P
-Using this option, an application can run matches in arbitrary data, knowing
-that any matched strings that are returned will be valid UTF. This can be
-useful when searching for text in executable or other binary files.
-.
-.
.SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS"
.rs
.sp
The \fBpcre2_match()\fP options that are supported for JIT matching are
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
-PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
-PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not
-supported at match time.
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
+PCRE2_ANCHORED option is not supported at match time.
.P
If the PCRE2_NO_JIT option is passed to \fBpcre2_match()\fP it disables the
use of JIT, forcing matching by the interpreter code.
@@ -399,13 +376,10 @@ available, and which need the best possible performance, can instead use a
processed by \fBpcre2_jit_compile()\fP).
.P
The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly
-the same arguments as \fBpcre2_match()\fP. However, the subject string must be
-specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported
-option bits (for example, PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is the PCRE2_NO_JIT option. The
-return values are also the same as for \fBpcre2_match()\fP, plus
-PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
-that was not compiled.
+the same arguments as \fBpcre2_match()\fP. The return values are also the same,
+plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
+requested that was not compiled. Unsupported option bits (for example,
+PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT option.
.P
When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For example, if
@@ -438,6 +412,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 06 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 28 June 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2limits.3 b/dist2/doc/pcre2limits.3
index 9bf3626d..803e97b0 100644
--- a/dist2/doc/pcre2limits.3
+++ b/dist2/doc/pcre2limits.3
@@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2LIMITS 3 "30 March 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SIZE AND OTHER LIMITATIONS"
@@ -34,16 +34,16 @@ All values in repeating quantifiers must be less than 65536.
.P
The maximum length of a lookbehind assertion is 65535 characters.
.P
-There is no limit to the number of parenthesized groups, but there can be no
-more than 65535 capture groups, and there is a limit to the depth of nesting of
-parenthesized subpatterns of all kinds. This is imposed in order to limit the
-amount of system stack used at compile time. The default limit can be specified
-when PCRE2 is built; if not, the default is set to 250. An application can
-change this limit by calling pcre2_set_parens_nest_limit() to set the limit in
-a compile context.
+There is no limit to the number of parenthesized subpatterns, but there can be
+no more than 65535 capturing subpatterns. There is, however, a limit to the
+depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
+order to limit the amount of system stack used at compile time. The default
+limit can be specified when PCRE2 is built; if not, the default is set to 250.
+An application can change this limit by calling pcre2_set_parens_nest_limit()
+to set the limit in a compile context.
.P
-The maximum length of name for a named capture group is 32 code units, and the
-maximum number of such groups is 10000.
+The maximum length of name for a named subpattern is 32 code units, and the
+maximum number of named subpatterns is 10000.
.P
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
@@ -67,6 +67,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 02 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 30 March 2017
+Copyright (c) 1997-2017 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2matching.3 b/dist2/doc/pcre2matching.3
index 20ffac5b..81ce9683 100644
--- a/dist2/doc/pcre2matching.3
+++ b/dist2/doc/pcre2matching.3
@@ -1,4 +1,4 @@
-.TH PCRE2MATCHING 3 "10 October 2018" "PCRE2 10.33"
+.TH PCRE2MATCHING 3 "29 September 2014" "PCRE2 10.00"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 MATCHING ALGORITHMS"
@@ -113,8 +113,7 @@ do want multiple matches in such cases, either use an ungreedy repeat
("a\ed+?") or set the PCRE2_NO_AUTO_POSSESS option when compiling.
.P
There are a number of features of PCRE2 regular expressions that are not
-supported or behave differently in the alternative matching function. Those
-that are not supported cause an error if encountered.
+supported by the alternative matching algorithm. They are as follows:
.P
1. Because the algorithm finds all possible matches, the greedy or ungreedy
nature of repetition quantifiers is not relevant (though it may affect
@@ -136,26 +135,24 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
.P
3. Because no substrings are captured, backreferences within the pattern are
-not supported.
+not supported, and cause errors if encountered.
.P
4. For the same reason, conditional expressions that use a backreference as the
condition or test for a specific group recursion are not supported.
.P
-5. Again for the same reason, script runs are not supported.
-.P
-6. Because many paths through the tree may be active, the \eK escape sequence,
+5. Because many paths through the tree may be active, the \eK escape sequence,
which resets the start of the match when encountered (but may be on some paths
-and not on others), is not supported.
+and not on others), is not supported. It causes an error if encountered.
.P
-7. Callouts are supported, but the value of the \fIcapture_top\fP field is
+6. Callouts are supported, but the value of the \fIcapture_top\fP field is
always 1, and the value of the \fIcapture_last\fP field is always 0.
.P
-8. The \eC escape sequence, which (in the standard algorithm) always matches a
+7. The \eC escape sequence, which (in the standard algorithm) always matches a
single code unit, even in a UTF mode, is not supported in these modes, because
the alternative algorithm moves through the subject string one character (not
code unit) at a time, for all active paths through the tree.
.P
-9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
+8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
.
.
@@ -191,7 +188,7 @@ The alternative algorithm suffers from a number of disadvantages:
because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
.P
-2. Capturing parentheses, backreferences, and script runs are not supported.
+2. Capturing parentheses and backreferences are not supported.
.P
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
@@ -211,6 +208,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 10 October 2018
-Copyright (c) 1997-2018 University of Cambridge.
+Last updated: 29 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2pattern.3 b/dist2/doc/pcre2pattern.3
index de8d7cef..0247c524 100644
--- a/dist2/doc/pcre2pattern.3
+++ b/dist2/doc/pcre2pattern.3
@@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "12 February 2019" "PCRE2 10.33"
+.TH PCRE2PATTERN 3 "04 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
@@ -20,13 +20,13 @@ copious examples. Jeffrey Friedl's "Mastering Regular Expressions", published
by O'Reilly, covers regular expressions in great detail. This description of
PCRE2's regular expressions is intended as reference material.
.P
-This document discusses the regular expression patterns that are supported by
-PCRE2 when its main matching function, \fBpcre2_match()\fP, is used. PCRE2 also
-has an alternative matching function, \fBpcre2_dfa_match()\fP, which matches
-using a different algorithm that is not Perl-compatible. Some of the features
-discussed below are not available when DFA matching is used. The advantages and
-disadvantages of the alternative function, and how it differs from the normal
-function, are discussed in the
+This document discusses the patterns that are supported by PCRE2 when its main
+matching function, \fBpcre2_match()\fP, is used. PCRE2 also has an alternative
+matching function, \fBpcre2_dfa_match()\fP, which matches using a different
+algorithm that is not Perl-compatible. Some of the features discussed below are
+not available when DFA matching is used. The advantages and disadvantages of
+the alternative function, and how it differs from the normal function, are
+discussed in the
.\" HREF
\fBpcre2matching\fP
.\"
@@ -149,8 +149,8 @@ this indirectly restricts the amount of heap memory that is used, but there is
also an explicit memory limit that can be set.
.P
These facilities are provided to catch runaway matches that are provoked by
-patterns with huge matching trees. A common example is a pattern with nested
-unlimited repeats applied to a long string that does not match. When one of
+patterns with huge matching trees (a typical example is a pattern with nested
+unlimited repeats applied to a long string that does not match). When one of
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
can also be set by items at the start of the pattern of the form
.sp
@@ -264,10 +264,10 @@ matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case.
.P
-The power of regular expressions comes from the ability to include wild cards,
-character classes, alternatives, and repetitions in the pattern. These are
-encoded in the pattern by the use of \fImetacharacters\fP, which do not stand
-for themselves but instead are interpreted in some special way.
+The power of regular expressions comes from the ability to include alternatives
+and repetitions in the pattern. These are encoded in the pattern by the use of
+\fImetacharacters\fP, which do not stand for themselves but instead are
+interpreted in some special way.
.P
There are two different sets of metacharacters: those that are recognized
anywhere in the pattern except within square brackets, and those that are
@@ -280,11 +280,14 @@ are as follows:
. match any character except newline (by default)
[ start character class definition
| start of alternative branch
- ( start group or control verb
- ) end group or control verb
+ ( start subpattern
+ ) end subpattern
+ ? extends the meaning of (
+ also 0 or 1 quantifier
+ also quantifier minimizer
* 0 or more quantifier
- + 1 or more quantifier; also "possessive quantifier"
- ? 0 or 1 quantifier; also quantifier minimizer
+ + 1 or more quantifier
+ also "possessive quantifier"
{ start min/max quantifier
.sp
Part of a pattern that is in square brackets is called a "character class". In
@@ -293,7 +296,9 @@ a character class the only metacharacters are:
\e general escape character
^ negate the class, but only if the first character
- indicates character range
- [ POSIX character class (if followed by POSIX syntax)
+.\" JOIN
+ [ POSIX character class (only if followed by POSIX
+ syntax)
] terminates the character class
.sp
The following sections describe the use of each of the metacharacters.
@@ -303,7 +308,7 @@ The following sections describe the use of each of the metacharacters.
.rs
.sp
The backslash character has several uses. Firstly, if it is followed by a
-character that is not a digit or a letter, it takes away any special meaning
+character that is not a number or a letter, it takes away any special meaning
that character may have. This use of backslash as an escape character applies
both inside and outside character classes.
.P
@@ -313,7 +318,7 @@ would otherwise be interpreted as a metacharacter, so it is always safe to
precede a non-alphanumeric with backslash to specify that it stands for itself.
In particular, if you want to match a backslash, you write \e\e.
.P
-In a UTF mode, only ASCII digits and letters have any special meaning after a
+In a UTF mode, only ASCII numbers and letters have any special meaning after a
backslash. All other characters (in particular, those whose code points are
greater than 127) are treated as literals.
.P
@@ -323,13 +328,13 @@ outside a character class and the next newline, inclusive, are ignored. An
escaping backslash can be used to include a white space or # character as part
of the pattern.
.P
-If you want to treat all characters in a sequence as literals, you can do so by
-putting them between \eQ and \eE. This is different from Perl in that $ and @
-are handled as literals in \eQ...\eE sequences in PCRE2, whereas in Perl, $ and
-@ cause variable interpolation. Also, Perl does "double-quotish backslash
-interpolation" on any backslashes between \eQ and \eE which, its documentation
-says, "may lead to confusing results". PCRE2 treats a backslash between \eQ and
-\eE just like any other character. Note the following examples:
+If you want to remove the special meaning from a sequence of characters, you
+can do so by putting them between \eQ and \eE. This is different from Perl in
+that $ and @ are handled as literals in \eQ...\eE sequences in PCRE2, whereas
+in Perl, $ and @ cause variable interpolation. Also, Perl does "double-quotish
+backslash interpolation" on any backslashes between \eQ and \eE which, its
+documentation says, "may lead to confusing results". PCRE2 treats a backslash
+between \eQ and \eE just like any other character. Note the following examples:
.sp
Pattern PCRE2 matches Perl matches
.sp
@@ -357,15 +362,15 @@ A second use of backslash provides a way of encoding non-printing characters
in patterns in a visible manner. There is no restriction on the appearance of
non-printing characters in a pattern, but when a pattern is being prepared by
text editing, it is often easier to use one of the following escape sequences
-instead of the binary character it represents. In an ASCII or Unicode
-environment, these escapes are as follows:
+than the binary character it represents. In an ASCII or Unicode environment,
+these escapes are as follows:
.sp
\ea alarm, that is, the BEL character (hex 07)
\ecx "control-x", where x is any printable ASCII character
\ee escape (hex 1B)
\ef form feed (hex 0C)
\en linefeed (hex 0A)
- \er carriage return (hex 0D) (but see below)
+ \er carriage return (hex 0D)
\et tab (hex 09)
\e0dd character with octal code 0dd
\eddd character with octal code ddd, or backreference
@@ -373,42 +378,14 @@ environment, these escapes are as follows:
\exhh character with hex code hh
\ex{hhh..} character with hex code hhh..
\eN{U+hhh..} character with Unicode hex code point hhh..
+ \euhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
.sp
-By default, after \ex that is not followed by {, from zero to two hexadecimal
-digits are read (letters can be in upper or lower case). Any number of
-hexadecimal digits may appear between \ex{ and }. If a character other than a
-hexadecimal digit appears between \ex{ and }, or if there is no terminating },
-an error occurs.
-.P
-Characters whose code points are less than 256 can be defined by either of the
-two syntaxes for \ex or by an octal sequence. There is no difference in the way
-they are handled. For example, \exdc is exactly the same as \ex{dc} or \e334.
-However, using the braced versions does make such sequences easier to read.
-.P
-Support is available for some ECMAScript (aka JavaScript) escape sequences via
-two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed
-by { is not recognized. Only if \ex is followed by two hexadecimal digits is it
-recognized as a character escape. Otherwise it is interpreted as a literal "x"
-character. In this mode, support for code points greater than 256 is provided
-by \eu, which must be followed by four hexadecimal digits; otherwise it is
-interpreted as a literal "u" character.
-.P
-PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
-\eu{hhh..} is recognized as the character specified by hexadecimal code point.
-There may be any number of hexadecimal digits. This syntax is from ECMAScript
-6.
-.P
The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses
\eN{name} to specify characters by Unicode name; PCRE2 does not support this.
Note that when \eN is not followed by an opening brace (curly bracket) it has
an entirely different meaning, matching any character that is not a newline.
.P
-There are some legacy applications where the escape sequence \er is expected to
-match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \er in a
-pattern is converted to \en so that it matches a LF (linefeed) instead of a CR
-(carriage return) character.
-.P
The precise effect of \ecx on ASCII characters is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the character (hex
40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is 5A),
@@ -459,17 +436,17 @@ and Perl has changed over time, causing PCRE2 also to change.
.P
Outside a character class, PCRE2 reads the digit and any following digits as a
decimal number. If the number is less than 10, begins with the digit 8 or 9, or
-if there are at least that many previous capture groups in the expression, the
-entire sequence is taken as a \fIbackreference\fP. A description of how this
-works is given
+if there are at least that many previous capturing left parentheses in the
+expression, the entire sequence is taken as a \fIbackreference\fP. A
+description of how this works is given
.\" HTML <a href="#backreferences">
.\" </a>
later,
.\"
following the discussion of
-.\" HTML <a href="#group">
+.\" HTML <a href="#subpattern">
.\" </a>
-parenthesized groups.
+parenthesized subpatterns.
.\"
Otherwise, up to three octal digits are read to form a character code.
.P
@@ -481,7 +458,7 @@ for themselves. For example, outside a character class:
\e040 is another way of writing an ASCII space
.\" JOIN
\e40 is the same, provided there are fewer than 40
- previous capture groups
+ previous capturing subpatterns
\e7 is always a backreference
.\" JOIN
\e11 might be a backreference, or another way of
@@ -500,6 +477,23 @@ for themselves. For example, outside a character class:
Note that octal values of 100 or greater that are specified using this syntax
must not be introduced by a leading zero, because no more than three octal
digits are ever read.
+.P
+By default, after \ex that is not followed by {, from zero to two hexadecimal
+digits are read (letters can be in upper or lower case). Any number of
+hexadecimal digits may appear between \ex{ and }. If a character other than
+a hexadecimal digit appears between \ex{ and }, or if there is no terminating
+}, an error occurs.
+.P
+If the PCRE2_ALT_BSUX option is set, the interpretation of \ex is as just
+described only when it is followed by two hexadecimal digits. Otherwise, it
+matches a literal "x" character. In this mode, support for code points greater
+than 256 is provided by \eu, which must be followed by four hexadecimal digits;
+otherwise it matches a literal "u" character.
+.P
+Characters whose value is less than 256 can be defined by either of the two
+syntaxes for \ex (or by \eu in PCRE2_ALT_BSUX mode). There is no difference in
+the way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
+\eu00dc in PCRE2_ALT_BSUX mode).
.
.
.SS "Constraints on character values"
@@ -538,10 +532,9 @@ character class, these sequences have different meanings.
.sp
In Perl, the sequences \eF, \el, \eL, \eu, and \eU are recognized by its string
handler and used to modify the case of following characters. By default, PCRE2
-does not support these escape sequences in patterns. However, if either of the
-PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \eU matches a "U"
-character, and \eu can be used to define a character by code point, as
-described above.
+does not support these escape sequences. However, if the PCRE2_ALT_BSUX option
+is set, \eU matches a "U" character, and \eu can be used to define a character
+by code point, as described above.
.
.
.SS "Absolute and relative backreferences"
@@ -555,9 +548,9 @@ can be coded as \eg{name}. Backreferences are discussed
later,
.\"
following the discussion of
-.\" HTML <a href="#group">
+.\" HTML <a href="#subpattern">
.\" </a>
-parenthesized groups.
+parenthesized subpatterns.
.\"
.
.
@@ -566,14 +559,14 @@ parenthesized groups.
.sp
For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
-syntax for referencing a capture group as a subroutine. Details are discussed
+syntax for referencing a subpattern as a "subroutine". Details are discussed
.\" HTML <a href="#onigurumasubroutines">
.\" </a>
later.
.\"
Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
synonymous. The former is a backreference; the latter is a
-.\" HTML <a href="#groupsassubroutines">
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutine
.\"
@@ -753,22 +746,21 @@ an error.
.rs
.sp
When PCRE2 is built with Unicode support (the default), three additional escape
-sequences that match characters with specific properties are available. They
-can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
-sequences are of course limited to testing characters whose code points are
-less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
-greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+sequences that match characters with specific properties are available. In
+8-bit non-UTF-8 mode, these sequences are of course limited to testing
+characters whose code points are less than 256, but they do work in this mode.
+In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit)
+may be encountered. These are all treated as being in the Common script and
+with an unassigned type. The extra escape sequences are:
.sp
\ep{\fIxx\fP} a character with the \fIxx\fP property
\eP{\fIxx\fP} a character without the \fIxx\fP property
\eX a Unicode extended grapheme cluster
.sp
-The property names represented by \fIxx\fP above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
+The property names represented by \fIxx\fP above are limited to the Unicode
+script names, the general category properties, "Any", which matches any
+character (including newline), and some special PCRE2 properties (described
+in the
.\" HTML <a href="#extraprops">
.\" </a>
next section).
@@ -784,10 +776,8 @@ example:
\ep{Greek}
\eP{Han}
.sp
-Unassigned characters (and in non-UTF 32-bit mode, characters with code points
-greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
-part of an identified script are lumped together as "Common". The current list
-of scripts is:
+Those that are not part of an identified script are lumped together as
+"Common". The current list of scripts is:
.P
Adlam,
Ahom,
@@ -933,7 +923,6 @@ Tibetan,
Tifinagh,
Tirhuta,
Ugaritic,
-Unknown,
Vai,
Warang_Citi,
Yi,
@@ -1002,16 +991,14 @@ The special property L& is also supported: it matches a character that has
the Lu, Ll, or Lt property, in other words, a letter that is not classified as
a modifier or "other".
.P
-The Cs (Surrogate) property applies only to characters whose code points are in
-the range U+D800 to U+DFFF. These characters are no different to any other
-character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library).
-However, they are not valid in Unicode strings and so cannot be tested by PCRE2
-in UTF mode, unless UTF validity checking has been turned off (see the
-discussion of PCRE2_NO_UTF_CHECK in the
+The Cs (Surrogate) property applies only to characters in the range U+D800 to
+U+DFFF. Such characters are not valid in Unicode strings and so
+cannot be tested by PCRE2, unless UTF validity checking has been turned off
+(see the discussion of PCRE2_NO_UTF_CHECK in the
.\" HREF
\fBpcre2api\fP
.\"
-page).
+page). Perl does not support the Cs property.
.P
The long synonyms for property names that Perl supports (such as \ep{Letter})
are not supported by PCRE2, nor is it permitted to prefix any of these
@@ -1135,7 +1122,7 @@ a lookbehind assertion
However, in this case, the part of the subject before the real match does not
have to be of fixed length, as lookbehind assertions do. The use of \eK does
not interfere with the setting of
-.\" HTML <a href="#group">
+.\" HTML <a href="#subpattern">
.\" </a>
captured substrings.
.\"
@@ -1166,7 +1153,7 @@ start of the reported match is earlier than where the match started.
The final use of backslash is for certain simple assertions. An assertion
specifies a condition that has to be met at a particular point in a match,
without consuming any characters from the subject string. The use of
-groups for more complicated assertions is described
+subpatterns for more complicated assertions is described
.\" HTML <a href="#bigassertions">
.\" </a>
below.
@@ -1188,12 +1175,12 @@ character. If any other of these assertions appears in a character class, an
A word boundary is a position in the subject string where the current character
and the previous character do not both match \ew or \eW (i.e. one matches
\ew and the other matches \eW), or the start or end of the string if the
-first or last character matches \ew, respectively. When PCRE2 is built with
-Unicode support, the meanings of \ew and \eW can be changed by setting the
-PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2
-nor Perl has a separate "start of word" or "end of word" metasequence. However,
-whatever follows \eb normally determines which it is. For example, the fragment
-\eba matches "a" at the start of a word.
+first or last character matches \ew, respectively. In a UTF mode, the meanings
+of \ew and \eW can be changed by setting the PCRE2_UCP option. When this is
+done, it also affects \eb and \eB. Neither PCRE2 nor Perl has a separate "start
+of word" or "end of word" metasequence. However, whatever follows \eb normally
+determines which it is. For example, the fragment \eba matches "a" at the start
+of a word.
.P
The \eA, \eZ, and \ez assertions differ from the traditional circumflex and
dollar (described in the next section) in that they only ever match at the very
@@ -1385,9 +1372,9 @@ could be used with a UTF-8 string (ignore white space and line breaks):
.sp
In this example, a group that starts with (?| resets the capturing parentheses
numbers in each alternative (see
-.\" HTML <a href="#dupgroupnumber">
+.\" HTML <a href="#dupsubpatternnumber">
.\" </a>
-"Duplicate Group Numbers"
+"Duplicate Subpattern Numbers"
.\"
below). The assertions at the start of each branch check the next UTF-8
character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The
@@ -1629,13 +1616,13 @@ the pattern
matches either "gilbert" or "sullivan". Any number of alternatives may appear,
and an empty alternative is permitted (matching the empty string). The matching
process tries each alternative in turn, from left to right, and the first one
-that succeeds is used. If the alternatives are within a group
-.\" HTML <a href="#group">
+that succeeds is used. If the alternatives are within a subpattern
+.\" HTML <a href="#subpattern">
.\" </a>
(defined below),
.\"
"succeeds" means matching the rest of the main pattern as well as the
-alternative in the group.
+alternative in the subpattern.
.
.
.SH "INTERNAL OPTION SETTING"
@@ -1678,16 +1665,16 @@ the same way as the Perl-compatible options by using the characters J and U
respectively. However, these are not unset by (?^).
.P
When one of these option changes occurs at top level (that is, not inside
-group parentheses), the change applies to the remainder of the pattern
-that follows. An option change within a group (see below for a description
-of groups) affects only that part of the group that follows it, so
+subpattern parentheses), the change applies to the remainder of the pattern
+that follows. An option change within a subpattern (see below for a description
+of subpatterns) affects only that part of the subpattern that follows it, so
.sp
(a(?i)b)c
.sp
matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not used).
By this means, options can be made to have different settings in different
parts of the pattern. Any changes made in one alternative do carry on
-into subsequent branches within the same group. For example,
+into subsequent branches within the same subpattern. For example,
.sp
(a(?i)b|c)
.sp
@@ -1697,7 +1684,7 @@ option settings happen at compile time. There would be some very weird
behaviour otherwise.
.P
As a convenient shorthand, if any option settings are required at the start of
-a non-capturing group (see the next section), the option letters may
+a non-capturing subpattern (see the next section), the option letters may
appear between the "?" and the ":". Thus the two patterns
.sp
(?i:saturday|sunday)
@@ -1705,11 +1692,10 @@ appear between the "?" and the ":". Thus the two patterns
.sp
match exactly the same set of strings.
.P
-\fBNote:\fP There are other PCRE2-specific options, applying to the whole
-pattern, which can be set by the application when the compiling function is
-called. In addition, the pattern can contain special leading sequences such as
-(*CRLF) to override what the application has set or what has been defaulted.
-Details are given in the section entitled
+\fBNote:\fP There are other PCRE2-specific options that can be set by the
+application when the compiling function is called. The pattern can contain
+special leading sequences such as (*CRLF) to override what the application has
+set or what has been defaulted. Details are given in the section entitled
.\" HTML <a href="#newlineseq">
.\" </a>
"Newline sequences"
@@ -1721,12 +1707,12 @@ the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP options, which lock out the use of the
(*UTF) and (*UCP) sequences.
.
.
-.\" HTML <a name="group"></a>
-.SH GROUPS
+.\" HTML <a name="subpattern"></a>
+.SH SUBPATTERNS
.rs
.sp
-Groups are delimited by parentheses (round brackets), which can be nested.
-Turning part of a pattern into a group does two things:
+Subpatterns are delimited by parentheses (round brackets), which can be nested.
+Turning part of a pattern into a subpattern does two things:
.sp
1. It localizes a set of alternatives. For example, the pattern
.sp
@@ -1735,15 +1721,15 @@ Turning part of a pattern into a group does two things:
matches "cataract", "caterpillar", or "cat". Without the parentheses, it would
match "cataract", "erpillar" or an empty string.
.sp
-2. It creates a "capture group". This means that, when the whole pattern
-matches, the portion of the subject string that matched the group is passed
-back to the caller, separately from the portion that matched the whole pattern.
-(This applies only to the traditional matching function; the DFA matching
-function does not support capturing.)
+2. It sets up the subpattern as a capturing subpattern. This means that, when
+the whole pattern matches, the portion of the subject string that matched the
+subpattern is passed back to the caller, separately from the portion that
+matched the whole pattern. (This applies only to the traditional matching
+function; the DFA matching function does not support capturing.)
.P
Opening parentheses are counted from left to right (starting from 1) to obtain
-numbers for capture groups. For example, if the string "the red king" is
-matched against the pattern
+numbers for the capturing subpatterns. For example, if the string "the red
+king" is matched against the pattern
.sp
the ((red|white) (king|queen))
.sp
@@ -1751,37 +1737,38 @@ the captured substrings are "red king", "red", and "king", and are numbered 1,
2, and 3, respectively.
.P
The fact that plain parentheses fulfil two functions is not always helpful.
-There are often times when grouping is required without capturing. If an
-opening parenthesis is followed by a question mark and a colon, the group
-does not do any capturing, and is not counted when computing the number of any
-subsequent capture groups. For example, if the string "the white queen"
-is matched against the pattern
+There are often times when a grouping subpattern is required without a
+capturing requirement. If an opening parenthesis is followed by a question mark
+and a colon, the subpattern does not do any capturing, and is not counted when
+computing the number of any subsequent capturing subpatterns. For example, if
+the string "the white queen" is matched against the pattern
.sp
the ((?:red|white) (king|queen))
.sp
the captured substrings are "white queen" and "queen", and are numbered 1 and
-2. The maximum number of capture groups is 65535.
+2. The maximum number of capturing subpatterns is 65535.
.P
As a convenient shorthand, if any option settings are required at the start of
-a non-capturing group, the option letters may appear between the "?" and the
-":". Thus the two patterns
+a non-capturing subpattern, the option letters may appear between the "?" and
+the ":". Thus the two patterns
.sp
(?i:saturday|sunday)
(?:(?i)saturday|sunday)
.sp
match exactly the same set of strings. Because alternative branches are tried
-from left to right, and options are not reset until the end of the group is
-reached, an option setting in one branch does affect subsequent branches, so
+from left to right, and options are not reset until the end of the subpattern
+is reached, an option setting in one branch does affect subsequent branches, so
the above patterns match "SUNDAY" as well as "Saturday".
.
.
-.\" HTML <a name="dupgroupnumber"></a>
-.SH "DUPLICATE GROUP NUMBERS"
+.\" HTML <a name="dupsubpatternnumber"></a>
+.SH "DUPLICATE SUBPATTERN NUMBERS"
.rs
.sp
-Perl 5.10 introduced a feature whereby each alternative in a group uses the
-same numbers for its capturing parentheses. Such a group starts with (?| and is
-itself a non-capturing group. For example, consider this pattern:
+Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
+the same numbers for its capturing parentheses. Such a subpattern starts with
+(?| and is itself a non-capturing subpattern. For example, consider this
+pattern:
.sp
(?|(Sat)ur|(Sun))day
.sp
@@ -1791,7 +1778,7 @@ at captured substring number one, whichever alternative matched. This construct
is useful when you want to capture part, but not all, of one of a number of
alternatives. Inside a (?| group, parentheses are numbered as usual, but the
number is reset at the start of each branch. The numbers of any capturing
-parentheses that follow the whole group start after the highest number used in
+parentheses that follow the subpattern start after the highest number used in
any branch. The following example is taken from the Perl documentation. The
numbers underneath show in which buffer the captured content will be stored.
.sp
@@ -1799,12 +1786,13 @@ numbers underneath show in which buffer the captured content will be stored.
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
.sp
-A backreference to a capture group uses the most recent value that is set for
-the group. The following pattern matches "abcabc" or "defdef":
+A backreference to a numbered subpattern uses the most recent value that is
+set for that number by any subpattern. The following pattern matches "abcabc"
+or "defdef":
.sp
/(?|(abc)|(def))\e1/
.sp
-In contrast, a subroutine call to a capture group always refers to the
+In contrast, a subroutine call to a numbered subpattern always refers to the
first one in the pattern with the given number. The following pattern matches
"abcabc" or "defabc":
.sp
@@ -1818,35 +1806,29 @@ If a
.\" </a>
condition test
.\"
-for a group's having matched refers to a non-unique number, the test is
-true if any group with that number has matched.
+for a subpattern's having matched refers to a non-unique number, the test is
+true if any of the subpatterns of that number have matched.
.P
An alternative approach to using this "branch reset" feature is to use
-duplicate named groups, as described in the next section.
+duplicate named subpatterns, as described in the next section.
.
.
-.SH "NAMED CAPTURE GROUPS"
+.SH "NAMED SUBPATTERNS"
.rs
.sp
-Identifying capture groups by number is simple, but it can be very hard to keep
-track of the numbers in complicated patterns. Furthermore, if an expression is
-modified, the numbers may change. To help with this difficulty, PCRE2 supports
-the naming of capture groups. This feature was not added to Perl until release
-5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0,
-using the Python syntax. PCRE2 supports both the Perl and the Python syntax.
+Identifying capturing parentheses by number is simple, but it can be very hard
+to keep track of the numbers in complicated patterns. Furthermore, if an
+expression is modified, the numbers may change. To help with this difficulty,
+PCRE2 supports the naming of capturing subpatterns. This feature was not added
+to Perl until release 5.10. Python had the feature earlier, and PCRE1
+introduced it at release 4.0, using the Python syntax. PCRE2 supports both the
+Perl and the Python syntax.
.P
-In PCRE2, a capture group can be named in one of three ways: (?<name>...) or
-(?'name'...) as in Perl, or (?P<name>...) as in Python. Names may be up to 32
-code units long. When PCRE2_UTF is not set, they may contain only ASCII
-alphanumeric characters and underscores, but must start with a non-digit. When
-PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode
-letter or Unicode decimal digit. In other words, group names must match one of
-these patterns:
-.sp
- ^[_A-Za-z][_A-Za-z0-9]*\ez when PCRE2_UTF is not set
- ^[_\ep{L}][_\ep{L}\ep{Nd}]*\ez when PCRE2_UTF is set
-.sp
-References to capture groups from other parts of the pattern, such as
+In PCRE2, a capturing subpattern can be named in one of three ways:
+(?<name>...) or (?'name'...) as in Perl, or (?P<name>...) as in Python. Names
+consist of up to 32 alphanumeric characters and underscores, but must start
+with a non-digit. References to capturing parentheses from other parts of the
+pattern, such as
.\" HTML <a href="#backreferences">
.\" </a>
backreferences,
@@ -1862,17 +1844,17 @@ conditions,
.\"
can all be made by name as well as by number.
.P
-Named capture groups are allocated numbers as well as names, exactly as
-if the names were not present. In both PCRE2 and Perl, capture groups
+Named capturing parentheses are allocated numbers as well as names, exactly as
+if the names were not present. In both PCRE2 and Perl, capturing subpatterns
are primarily identified by numbers; any names are just aliases for these
numbers. The PCRE2 API provides function calls for extracting the complete
name-to-number translation table from a compiled pattern, as well as
convenience functions for extracting captured substrings by name.
.P
-\fBWarning:\fP When more than one capture group has the same number, as
-described in the previous section, a name given to one of them applies to all
-of them. Perl allows identically numbered groups to have different names.
-Consider this pattern, where there are two capture groups, both numbered 1:
+\fBWarning:\fP When more than one subpattern has the same number, as described
+in the previous section, a name given to one of them applies to all of them.
+Perl allows identically numbered subpatterns to have different names. Consider
+this pattern, where there are two capturing subpatterns, both numbered 1:
.sp
(?|(?<AA>aa)|(?<BB>bb))
.sp
@@ -1886,20 +1868,20 @@ pattern:
.sp
(?|(?<AA>aa)|(bb))
.sp
-Although the second group number 1 is not explicitly named, the name AA is
-still an alias for any group 1. Whether the pattern matches "aa" or "bb", a
+Although the second subpattern number 1 is not explicitly named, the name AA is
+still an alias for subpattern 1. Whether the pattern matches "aa" or "bb", a
reference by name to group AA yields the matched string.
.P
By default, a name must be unique within a pattern, except that duplicate names
-are permitted for groups with the same number, for example:
+are permitted for subpatterns with the same number, for example:
.sp
(?|(?<AA>aa)|(?<AA>bb))
.sp
The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
+names can be useful for patterns where only one instance of the named
+parentheses can match. Suppose you want to match the name of a weekday, either
+as a 3-letter abbreviation or as the full name, and in both cases you want to
extract the abbreviation. This pattern (ignoring the line breaks) does the job:
.sp
(?<DN>Mon|Fri|Sun)(?:day)?|
@@ -1908,23 +1890,23 @@ extract the abbreviation. This pattern (ignoring the line breaks) does the job:
(?<DN>Thu)(?:rsday)?|
(?<DN>Sat)(?:urday)?
.sp
-There are five capture groups, but only one is ever set after a match. The
-convenience functions for extracting the data by name returns the substring for
-the first (and in this example, the only) group of that name that matched. This
-saves searching to find which numbered group it was. (An alternative way of
-solving this problem is to use a "branch reset" group, as described in the
-previous section.)
+There are five capturing substrings, but only one is ever set after a match.
+The convenience functions for extracting the data by name returns the substring
+for the first (and in this example, the only) subpattern of that name that
+matched. This saves searching to find which numbered subpattern it was. (An
+alternative way of solving this problem is to use a "branch reset" subpattern,
+as described in the previous section.)
.P
-If you make a backreference to a non-unique named group from elsewhere in the
-pattern, the groups to which the name refers are checked in the order in which
-they appear in the overall pattern. The first one that is set is used for the
-reference. For example, this pattern matches both "foofoo" and "barbar" but not
-"foobar" or "barfoo":
+If you make a backreference to a non-unique named subpattern from elsewhere in
+the pattern, the subpatterns to which the name refers are checked in the order
+in which they appear in the overall pattern. The first one that is set is used
+for the reference. For example, this pattern matches both "foofoo" and
+"barbar" but not "foobar" or "barfoo":
.sp
(?:(?<n>foo)|(?<n>bar))\ek<n>
.sp
.P
-If you make a subroutine call to a non-unique named group, the one that
+If you make a subroutine call to a non-unique named subpattern, the one that
corresponds to the first occurrence of the name is used. In the absence of
duplicate numbers this is the one with the lowest number.
.P
@@ -1935,11 +1917,11 @@ test (see the
.\" </a>
section about conditions
.\"
-below), either to check whether a capture group has matched, or to check for
-recursion, all groups with the same name are tested. If the condition is true
-for any one of them, the overall condition is true. This is the same behaviour
-as testing by number. For further details of the interfaces for handling named
-capture groups, see the
+below), either to check whether a subpattern has matched, or to check for
+recursion, all subpatterns with the same name are tested. If the condition is
+true for any one of them, the overall condition is true. This is the same
+behaviour as testing by number. For further details of the interfaces for
+handling named subpatterns, see the
.\" HREF
\fBpcre2api\fP
.\"
@@ -1955,18 +1937,18 @@ items:
a literal data character
the dot metacharacter
the \eC escape sequence
- the \eR escape sequence
the \eX escape sequence
+ the \eR escape sequence
an escape such as \ed or \epL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
- a subroutine call (recursive or otherwise)
+ a parenthesized subpattern (including most assertions)
+ a subroutine call to a subpattern (recursive or otherwise)
.sp
The general repetition quantifier specifies a minimum and maximum number of
permitted matches, by giving the two numbers in curly brackets (braces),
separated by a comma. The numbers must be less than 65536, and the first must
-be less than or equal to the second. For example,
+be less than or equal to the second. For example:
.sp
z{2,4}
.sp
@@ -1994,18 +1976,18 @@ several code units long (and they may be of different lengths).
.P
The quantifier {0} is permitted, causing the expression to behave as if the
previous item and the quantifier were not present. This may be useful for
-capture groups that are referenced as
-.\" HTML <a href="#groupsassubroutines">
+subpatterns that are referenced as
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutines
.\"
from elsewhere in the pattern (but see also the section entitled
.\" HTML <a href="#subdefine">
.\" </a>
-"Defining capture groups for use by reference only"
+"Defining subpatterns for use by reference only"
.\"
-below). Except for parenthesized groups, items that have a {0} quantifier are
-omitted from the compiled pattern.
+below). Items other than subpatterns that have a {0} quantifier are omitted
+from the compiled pattern.
.P
For convenience, the three most common quantifiers have single-character
abbreviations:
@@ -2014,22 +1996,22 @@ abbreviations:
+ is equivalent to {1,}
? is equivalent to {0,1}
.sp
-It is possible to construct infinite loops by following a group that can match
-no characters with a quantifier that has no upper limit, for example:
+It is possible to construct infinite loops by following a subpattern that can
+match no characters with a quantifier that has no upper limit, for example:
.sp
(a?)*
.sp
Earlier versions of Perl and PCRE1 used to give an error at compile time for
such patterns. However, because there are cases where this can be useful, such
-patterns are now accepted, but if any repetition of the group does in fact
+patterns are now accepted, but if any repetition of the subpattern does in fact
match no characters, the loop is forcibly broken.
.P
-By default, quantifiers are "greedy", that is, they match as much as possible
-(up to the maximum number of permitted times), without causing the rest of the
-pattern to fail. The classic example of where this gives problems is in trying
-to match comments in C programs. These appear between /* and */ and within the
-comment, individual * and / characters may appear. An attempt to match C
-comments by applying the pattern
+By default, the quantifiers are "greedy", that is, they match as much as
+possible (up to the maximum number of permitted times), without causing the
+rest of the pattern to fail. The classic example of where this gives problems
+is in trying to match comments in C programs. These appear between /* and */
+and within the comment, individual * and / characters may appear. An attempt to
+match C comments by applying the pattern
.sp
/\e*.*\e*/
.sp
@@ -2038,9 +2020,10 @@ to the string
/* first comment */ not comment /* second comment */
.sp
fails, because it matches the entire string owing to the greediness of the .*
-item. However, if a quantifier is followed by a question mark, it ceases to be
-greedy, and instead matches the minimum number of times possible, so the
-pattern
+item.
+.P
+If a quantifier is followed by a question mark, it ceases to be greedy, and
+instead matches the minimum number of times possible, so the pattern
.sp
/\e*.*?\e*/
.sp
@@ -2059,7 +2042,7 @@ the quantifiers are not greedy by default, but individual ones can be made
greedy by following them with a question mark. In other words, it inverts the
default behaviour.
.P
-When a parenthesized group is quantified with a minimum repeat count that
+When a parenthesized subpattern is quantified with a minimum repeat count that
is greater than 1 or with a limited maximum, more memory is required for the
compiled pattern, in proportion to the size of the minimum or maximum.
.P
@@ -2094,14 +2077,15 @@ It matches "ab" in the subject "aab". The use of the backtracking control verbs
(*PRUNE) and (*SKIP) also disable this optimization, and there is an option,
PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly.
.P
-When a capture group is repeated, the value captured is the substring that
-matched the final iteration. For example, after
+When a capturing subpattern is repeated, the value captured is the substring
+that matched the final iteration. For example, after
.sp
(tweedle[dume]{3}\es*)+
.sp
has matched "tweedledum tweedledee" the value of the captured substring is
-"tweedledee". However, if there are nested capture groups, the corresponding
-captured values may have been set in previous iterations. For example, after
+"tweedledee". However, if there are nested capturing subpatterns, the
+corresponding captured values may have been set in previous iterations. For
+example, after
.sp
(a|(b))+
.sp
@@ -2127,7 +2111,7 @@ After matching all 6 digits and then failing to match "foo", the normal
action of the matcher is to try again with only 5 digits matching the \ed+
item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
(a term taken from Jeffrey Friedl's book) provides the means for specifying
-that once a group has matched, it is not to be re-evaluated in this way.
+that once a subpattern has matched, it is not to be re-evaluated in this way.
.P
If we use atomic grouping for the previous example, the matcher gives up
immediately on failing to match "foo" the first time. The notation is a kind of
@@ -2135,28 +2119,23 @@ special parenthesis, starting with (?> as in this example:
.sp
(?>\ed+)foo
.sp
-Perl 5.28 introduced an experimental alphabetic form starting with (* which may
-be easier to remember:
-.sp
- (*atomic:\ed+)foo
-.sp
-This kind of parenthesized group "locks up" the part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesis "locks up" the part of the pattern it contains once
+it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
.P
-An alternative description is that a group of this type matches exactly the
-string of characters that an identical standalone pattern would match, if
+An alternative description is that a subpattern of this type matches exactly
+the string of characters that an identical standalone pattern would match, if
anchored at the current point in the subject string.
.P
-Atomic groups are not capture groups. Simple cases such as the above example
-can be thought of as a maximizing repeat that must swallow everything it can.
-So, while both \ed+ and \ed+? are prepared to adjust the number of digits they
-match in order to make the rest of the pattern match, (?>\ed+) can only match
-an entire sequence of digits.
+Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as
+the above example can be thought of as a maximizing repeat that must swallow
+everything it can. So, while both \ed+ and \ed+? are prepared to adjust the
+number of digits they match in order to make the rest of the pattern match,
+(?>\ed+) can only match an entire sequence of digits.
.P
Atomic groups in general can of course contain arbitrarily complicated
-expressions, and can be nested. However, when the contents of an atomic
+subpatterns, and can be nested. However, when the subpattern for an atomic
group is just a single repeated item, as in the example above, a simpler
notation, called a "possessive quantifier" can be used. This consists of an
additional + character following a quantifier. Using this notation, the
@@ -2178,8 +2157,8 @@ difference; possessive quantifiers should be slightly faster.
The possessive quantifier syntax is an extension to the Perl 5.8 syntax.
Jeffrey Friedl originated the idea (and the name) in the first edition of his
book. Mike McCloskey liked it, so implemented it when he built Sun's Java
-package, and PCRE1 copied it from there. It found its way into Perl at release
-5.10.
+package, and PCRE1 copied it from there. It ultimately found its way into Perl
+at release 5.10.
.P
PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
@@ -2187,9 +2166,10 @@ there is no point in backtracking into a sequence of A's when B must follow.
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
.P
-When a pattern contains an unlimited repeat inside a group that can itself be
-repeated an unlimited number of times, the use of an atomic group is the only
-way to avoid some failing matches taking a very long time indeed. The pattern
+When a pattern contains an unlimited repeat inside a subpattern that can itself
+be repeated an unlimited number of times, the use of an atomic group is the
+only way to avoid some failing matches taking a very long time indeed. The
+pattern
.sp
(\eD+|<\ed+>)*[!?]
.sp
@@ -2218,28 +2198,29 @@ sequences of non-digits cannot be broken, and failure happens quickly.
.rs
.sp
Outside a character class, a backslash followed by a digit greater than 0 (and
-possibly further digits) is a backreference to a capture group earlier (that
-is, to its left) in the pattern, provided there have been that many previous
-capture groups.
+possibly further digits) is a backreference to a capturing subpattern earlier
+(that is, to its left) in the pattern, provided there have been that many
+previous capturing left parentheses.
.P
However, if the decimal number following the backslash is less than 8, it is
-always taken as a backreference, and causes an error only if there are not that
-many capture groups in the entire pattern. In other words, the group that is
-referenced need not be to the left of the reference for numbers less than 8. A
-"forward backreference" of this type can make sense when a repetition is
-involved and the group to the right has participated in an earlier iteration.
-.P
-It is not possible to have a numerical "forward backreference" to a group whose
-number is 8 or more using this syntax because a sequence such as \e50 is
+always taken as a backreference, and causes an error only if there are not
+that many capturing left parentheses in the entire pattern. In other words, the
+parentheses that are referenced need not be to the left of the reference for
+numbers less than 8. A "forward backreference" of this type can make sense
+when a repetition is involved and the subpattern to the right has participated
+in an earlier iteration.
+.P
+It is not possible to have a numerical "forward backreference" to a subpattern
+whose number is 8 or more using this syntax because a sequence such as \e50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
.\" HTML <a href="#digitsafterbackslash">
.\" </a>
above
.\"
-for further details of the handling of digits following a backslash. Other
-forms of backreferencing do not suffer from this restriction. In particular,
-there is no problem when named capture groups are used (see below).
+for further details of the handling of digits following a backslash. There is
+no such problem when named parentheses are used. A backreference to any
+subpattern is possible using named parentheses (see below).
.P
Another way of avoiding the ambiguity inherent in the use of digits following a
backslash is to use the \eg escape sequence. This escape must be followed by a
@@ -2256,22 +2237,22 @@ the reference. A signed number is a relative reference. Consider this example:
.sp
(abc(def)ghi)\eg{-1}
.sp
-The sequence \eg{-1} is a reference to the most recently started capture group
-before \eg, that is, is it equivalent to \e2 in this example. Similarly,
-\eg{-2} would be equivalent to \e1. The use of relative references can be
-helpful in long patterns, and also in patterns that are created by joining
-together fragments that contain references within themselves.
+The sequence \eg{-1} is a reference to the most recently started capturing
+subpattern before \eg, that is, is it equivalent to \e2 in this example.
+Similarly, \eg{-2} would be equivalent to \e1. The use of relative references
+can be helpful in long patterns, and also in patterns that are created by
+joining together fragments that contain references within themselves.
.P
-The sequence \eg{+1} is a reference to the next capture group. This kind of
-forward reference can be useful in patterns that repeat. Perl does not support
-the use of + in this way.
+The sequence \eg{+1} is a reference to the next capturing subpattern. This kind
+of forward reference can be useful it patterns that repeat. Perl does not
+support the use of + in this way.
.P
-A backreference matches whatever actually most recently matched the capture
-group in the current subject string, rather than anything at all that matches
-the group (see
-.\" HTML <a href="#groupsassubroutines">
+A backreference matches whatever actually matched the capturing subpattern in
+the current subject string, rather than anything matching the subpattern
+itself (see
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
-"Groups as subroutines"
+"Subpatterns as subroutines"
.\"
below for a way of doing that). So the pattern
.sp
@@ -2284,26 +2265,26 @@ backreference, the case of letters is relevant. For example,
((?i)rah)\es+\e1
.sp
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
-capture group is matched caselessly.
+capturing subpattern is matched caselessly.
.P
-There are several different ways of writing backreferences to named capture
-groups. The .NET syntax \ek{name} and the Perl syntax \ek<name> or \ek'name'
-are supported, as is the Python syntax (?P=name). Perl 5.10's unified
+There are several different ways of writing backreferences to named
+subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or
+\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
backreference syntax, in which \eg can be used for both numeric and named
-references, is also supported. We could rewrite the above example in any of the
-following ways:
+references, is also supported. We could rewrite the above example in any of
+the following ways:
.sp
(?<p1>(?i)rah)\es+\ek<p1>
(?'p1'(?i)rah)\es+\ek{p1}
(?P<p1>(?i)rah)\es+(?P=p1)
(?<p1>(?i)rah)\es+\eg{p1}
.sp
-A capture group that is referenced by name may appear in the pattern before or
+A subpattern that is referenced by name may appear in the pattern before or
after the reference.
.P
-There may be more than one backreference to the same group. If a group has not
-actually been used in a particular match, backreferences to it always fail by
-default. For example, the pattern
+There may be more than one backreference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any backreferences
+to it always fail by default. For example, the pattern
.sp
(a|(bc))\e2
.sp
@@ -2311,11 +2292,12 @@ always fails if it starts to match "a" rather than "bc". However, if the
PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an
unset value matches an empty string.
.P
-Because there may be many capture groups in a pattern, all digits following a
-backslash are taken as part of a potential backreference number. If the pattern
-continues with a digit character, some delimiter must be used to terminate the
-backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this
-can be white space. Otherwise, the \eg{} syntax or an empty comment (see
+Because there may be many capturing parentheses in a pattern, all digits
+following a backslash are taken as part of a potential backreference number.
+If the pattern continues with a digit character, some delimiter must be used to
+terminate the backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
+option is set, this can be white space. Otherwise, the \eg{ syntax or an empty
+comment (see
.\" HTML <a href="#comments">
.\" </a>
"Comments"
@@ -2326,18 +2308,19 @@ below) can be used.
.SS "Recursive backreferences"
.rs
.sp
-A backreference that occurs inside the group to which it refers fails when the
-group is first used, so, for example, (a\e1) never matches. However, such
-references can be useful inside repeated groups. For example, the pattern
+A backreference that occurs inside the parentheses to which it refers fails
+when the subpattern is first used, so, for example, (a\e1) never matches.
+However, such references can be useful inside repeated subpatterns. For
+example, the pattern
.sp
(a|b\e1)+
.sp
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
-the group, the backreference matches the character string corresponding to the
-previous iteration. In order for this to work, the pattern must be such that
-the first iteration does not need to match the backreference. This can be done
-using alternation, as in the example above, or by a quantifier with a minimum
-of zero.
+the subpattern, the backreference matches the character string corresponding
+to the previous iteration. In order for this to work, the pattern must be such
+that the first iteration does not need to match the backreference. This can be
+done using alternation, as in the example above, or by a quantifier with a
+minimum of zero.
.P
Backreferences of this type cause the group that they reference to be treated
as an
@@ -2361,32 +2344,24 @@ coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described
above.
.\"
.P
-More complicated assertions are coded as parenthesized groups. There are two
-kinds: those that look ahead of the current position in the subject string, and
-those that look behind it, and in each case an assertion may be positive (must
-match for the assertion to be true) or negative (must not match for the
-assertion to be true). An assertion group is matched in the normal way,
-and if it is true, matching continues after it, but with the matching position
-in the subject string is was it was before the assertion was processed.
+More complicated assertions are coded as subpatterns. There are two kinds:
+those that look ahead of the current position in the subject string, and those
+that look behind it, and in each case an assertion may be positive (must
+succeed for matching to continue) or negative (must not succeed for matching to
+continue). An assertion subpattern is matched in the normal way, except that,
+when matching continues after a successful assertion, the matching position in
+the subject string is as it was before the assertion was processed.
.P
-A lookaround assertion may also appear as the condition in a
-.\" HTML <a href="#conditions">
-.\" </a>
-conditional group
-.\"
-(see below). In this case, the result of matching the assertion determines
-which branch of the condition is followed.
-.P
-Assertion groups are not capture groups. If an assertion contains capture
-groups within it, these are counted for the purposes of numbering the capture
-groups in the whole pattern. Within each branch of an assertion, locally
-captured substrings may be referenced in the usual way. For example, a sequence
-such as (.)\eg{-1} can be used to check that two adjacent characters are the
-same.
+Assertion subpatterns are not capturing subpatterns. If an assertion contains
+capturing subpatterns within it, these are counted for the purposes of
+numbering the capturing subpatterns in the whole pattern. Within each branch of
+an assertion, locally captured substrings may be referenced in the usual way.
+For example, a sequence such as (.)\eg{-1} can be used to check that two
+adjacent characters are the same.
.P
When a branch within an assertion fails to match, any substrings that were
captured are discarded (as happens with any pattern branch that fails to
-match). A negative assertion is true only when all its branches fail to match;
+match). A negative assertion succeeds only when all its branches fail to match;
this means that no captured substrings are ever retained after a successful
negative assertion. When an assertion contains a matching branch, what happens
depends on the type of assertion.
@@ -2394,26 +2369,26 @@ depends on the type of assertion.
For a positive assertion, internally captured substrings in the successful
branch are retained, and matching continues with the next pattern item after
the assertion. For a negative assertion, a matching branch means that the
-assertion is not true. If such an assertion is being used as a condition in a
+assertion has failed. If the assertion is being used as a condition in a
.\" HTML <a href="#conditions">
.\" </a>
-conditional group
+conditional subpattern
.\"
(see below), captured substrings are retained, because matching continues with
the "no" branch of the condition. For other failing negative assertions,
control passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
.P
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
+For compatibility with Perl, most assertion subpatterns may be repeated; though
+it makes no sense to assert the same thing several times, the side effect of
+capturing parentheses may occasionally be useful. However, an assertion that
+forms the condition for a conditional subpattern may not be quantified. In
+practice, for other assertions, there only three cases:
.sp
(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-.\" HTML <a href="#groupsassubroutines">
+However, it may contain internal capturing parenthesized groups that are called
+from elsewhere via the
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutine mechanism.
.\"
@@ -2426,25 +2401,6 @@ without the assertion, the order depending on the greediness of the quantifier.
The assertion is obeyed just once when encountered during matching.
.
.
-.SS "Alphabetic assertion names"
-.rs
-.sp
-Traditionally, symbolic sequences such as (?= and (?<= have been used to specify
-lookaround assertions. Perl 5.28 introduced some experimental alphabetic
-alternatives which might be easier to remember. They all start with (* instead
-of (? and must be written using lower case letters. PCRE2 supports the
-following synonyms:
-.sp
- (*positive_lookahead: or (*pla: is the same as (?=
- (*negative_lookahead: or (*nla: is the same as (?!
- (*positive_lookbehind: or (*plb: is the same as (?<=
- (*negative_lookbehind: or (*nlb: is the same as (?<!
-.sp
-For example, (*pla:foo) is the same assertion as (?=foo). In the following
-sections, the various assertions are described using the original symbolic
-forms.
-.
-.
.SS "Lookahead assertions"
.rs
.sp
@@ -2526,12 +2482,12 @@ because it makes it impossible to calculate the length of the lookbehind. The
\eX and \eR escapes, which can match different numbers of code units, are never
permitted in lookbehinds.
.P
-.\" HTML <a href="#groupsassubroutines">
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"Subroutine"
.\"
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
-as the called capture group matches a fixed-length string. However,
+as the subpattern matches a fixed-length string. However,
.\" HTML <a href="#recursion">
.\" </a>
recursion,
@@ -2542,10 +2498,10 @@ is not supported.
Perl does not support backreferences in lookbehinds. PCRE2 does support them,
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
must not be set, there must be no use of (?| in the pattern (it creates
-duplicate group numbers), and if the backreference is by name, the name
-must be unique. Of course, the referenced group must itself match a fixed
-length substring. The following pattern matches words containing at least two
-characters that begin and end with the same character:
+duplicate subpattern numbers), and if the backreference is by name, the name
+must be unique. Of course, the referenced subpattern must itself be of fixed
+length. The following pattern matches words containing at least two characters
+that begin and end with the same character:
.sp
\eb(\ew)\ew++(?<=\e1)
.P
@@ -2610,81 +2566,14 @@ is another pattern that matches "foo" preceded by three digits and any three
characters that are not "999".
.
.
-.SH "SCRIPT RUNS"
-.rs
-.sp
-In concept, a script run is a sequence of characters that are all from the same
-Unicode script such as Latin or Greek. However, because some scripts are
-commonly used together, and because some diacritical and other marks are used
-with multiple scripts, it is not that simple. There is a full description of
-the rules that PCRE2 uses in the section entitled
-.\" HTML <a href="pcre2unicode.html#scriptruns">
-.\" </a>
-"Script Runs"
-.\"
-in the
-.\" HREF
-\fBpcre2unicode\fP
-.\"
-documentation.
-.P
-If part of a pattern is enclosed between (*script_run: or (*sr: and a closing
-parenthesis, it fails if the sequence of characters that it matches are not a
-script run. After a failure, normal backtracking occurs. Script runs can be
-used to detect spoofing attacks using characters that look the same, but are
-from different scripts. The string "paypal.com" is an infamous example, where
-the letters could be a mixture of Latin and Cyrillic. This pattern ensures that
-the matched characters in a sequence of non-spaces that follow white space are
-a script run:
-.sp
- \es+(*sr:\eS+)
-.sp
-To be sure that they are all from the Latin script (for example), a lookahead
-can be used:
-.sp
- \es+(?=\ep{Latin})(*sr:\eS+)
-.sp
-This works as long as the first character is expected to be a character in that
-script, and not (for example) punctuation, which is allowed with any script. If
-this is not the case, a more creative lookahead is needed. For example, if
-digits, underscore, and dots are permitted at the start:
-.sp
- \es+(?=[0-9_.]*\ep{Latin})(*sr:\eS+)
-.sp
-.P
-In many cases, backtracking into a script run pattern fragment is not
-desirable. The script run can employ an atomic group to prevent this. Because
-this is a common requirement, a shorthand notation is provided by
-(*atomic_script_run: or (*asr:
-.sp
- (*asr:...) is the same as (*sr:(?>...))
-.sp
-Note that the atomic group is inside the script run. Putting it outside would
-not prevent backtracking into the script run pattern.
-.P
-Support for script runs is not available if PCRE2 is compiled without Unicode
-support. A compile-time error is given if any of the above constructs is
-encountered. Script runs are not supported by the alternate matching function,
-\fBpcre2_dfa_match()\fP because they use the same mechanism as capturing
-parentheses.
-.P
-\fBWarning:\fP The (*ACCEPT) control verb
-.\" HTML <a href="#acceptverb">
-.\" </a>
-(see below)
-.\"
-should not be used within a script run group, because it causes an immediate
-exit from the group, bypassing the script run checking.
-.
-.
.\" HTML <a name="conditions"></a>
-.SH "CONDITIONAL GROUPS"
+.SH "CONDITIONAL SUBPATTERNS"
.rs
.sp
-It is possible to cause the matching process to obey a pattern fragment
-conditionally or to choose between two alternative fragments, depending on
-the result of an assertion, or whether a specific capture group has
-already been matched. The two possible forms of conditional group are:
+It is possible to cause the matching process to obey a subpattern
+conditionally or to choose between two alternative subpatterns, depending on
+the result of an assertion, or whether a specific capturing subpattern has
+already been matched. The two possible forms of conditional subpattern are:
.sp
(?(condition)yes-pattern)
(?(condition)yes-pattern|no-pattern)
@@ -2692,36 +2581,38 @@ already been matched. The two possible forms of conditional group are:
If the condition is satisfied, the yes-pattern is used; otherwise the
no-pattern (if present) is used. An absent no-pattern is equivalent to an empty
string (it always matches). If there are more than two alternatives in the
-group, a compile-time error occurs. Each of the two alternatives may itself
-contain nested groups of any form, including conditional groups; the
-restriction to two alternatives applies only at the level of the condition
-itself. This pattern fragment is an example where the alternatives are complex:
+subpattern, a compile-time error occurs. Each of the two alternatives may
+itself contain nested subpatterns of any form, including conditional
+subpatterns; the restriction to two alternatives applies only at the level of
+the condition. This pattern fragment is an example where the alternatives are
+complex:
.sp
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
.sp
.P
-There are five kinds of condition: references to capture groups, references to
+There are five kinds of condition: references to subpatterns, references to
recursion, two pseudo-conditions called DEFINE and VERSION, and assertions.
.
.
-.SS "Checking for a used capture group by number"
+.SS "Checking for a used subpattern by number"
.rs
.sp
If the text between the parentheses consists of a sequence of digits, the
-condition is true if a capture group of that number has previously matched. If
-there is more than one capture group with the same number (see the earlier
+condition is true if a capturing subpattern of that number has previously
+matched. If there is more than one capturing subpattern with the same number
+(see the earlier
.\"
.\" HTML <a href="#recursion">
.\" </a>
-section about duplicate group numbers),
+section about duplicate subpattern numbers),
.\"
the condition is true if any of them have matched. An alternative notation is
-to precede the digits with a plus or minus sign. In this case, the group number
-is relative rather than absolute. The most recently opened capture group can be
-referenced by (?(-1), the next most recent by (?(-2), and so on. Inside loops
-it can also make sense to refer to subsequent groups. The next capture group
-can be referenced as (?(+1), and so on. (The value zero in any of these forms
-is not used; it provokes a compile-time error.)
+to precede the digits with a plus or minus sign. In this case, the subpattern
+number is relative rather than absolute. The most recently opened parentheses
+can be referenced by (?(-1), the next most recent by (?(-2), and so on. Inside
+loops it can also make sense to refer to subsequent groups. The next
+parentheses to be opened can be referenced as (?(+1), and so on. (The value
+zero in any of these forms is not used; it provokes a compile-time error.)
.P
Consider the following pattern, which contains non-significant white space to
make it more readable (assume the PCRE2_EXTENDED option) and to divide it into
@@ -2732,12 +2623,12 @@ three parts for ease of discussion:
The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The second part
matches one or more characters that are not parentheses. The third part is a
-conditional group that tests whether or not the first capture group
-matched. If it did, that is, if subject started with an opening parenthesis,
+conditional subpattern that tests whether or not the first set of parentheses
+matched. If they did, that is, if subject started with an opening parenthesis,
the condition is true, and so the yes-pattern is executed and a closing
parenthesis is required. Otherwise, since no-pattern is not present, the
-conditional group matches nothing. In other words, this pattern matches a
-sequence of non-parentheses, optionally enclosed in parentheses.
+subpattern matches nothing. In other words, this pattern matches a sequence of
+non-parentheses, optionally enclosed in parentheses.
.P
If you were embedding this pattern in a larger one, you could use a relative
reference:
@@ -2747,20 +2638,21 @@ reference:
This makes the fragment independent of the parentheses in the larger pattern.
.
.
-.SS "Checking for a used capture group by name"
+.SS "Checking for a used subpattern by name"
.rs
.sp
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
-capture group by name. For compatibility with earlier versions of PCRE1, which
-had this facility before Perl, the syntax (?(name)...) is also recognized.
-Note, however, that undelimited names consisting of the letter R followed by
-digits are ambiguous (see the following section). Rewriting the above example
-to use a named group gives this:
+subpattern by name. For compatibility with earlier versions of PCRE1, which had
+this facility before Perl, the syntax (?(name)...) is also recognized. Note,
+however, that undelimited names consisting of the letter R followed by digits
+are ambiguous (see the following section).
+.P
+Rewriting the above example to use a named subpattern gives this:
.sp
(?<OPEN> \e( )? [^()]+ (?(<OPEN>) \e) )
.sp
If the name used in a condition of this kind is a duplicate, the test is
-applied to all groups of the same name, and is true if any one of them has
+applied to all subpatterns of the same name, and is true if any one of them has
matched.
.
.
@@ -2775,22 +2667,22 @@ sections entitled
"Recursive patterns"
.\"
and
-.\" HTML <a href="#groupsassubroutines">
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
-"Groups as subroutines"
+"Subpatterns as subroutines"
.\"
-below for details of recursion and subroutine calls.
+below for details of recursion and subpattern calls.
.P
-If a condition is the string (R), and there is no capture group with the name
-R, the condition is true if matching is currently in a recursion or subroutine
-call to the whole pattern or any capture group. If digits follow the letter R,
-and there is no group with that name, the condition is true if the most recent
-call is into a group with the given number, which must exist somewhere in the
-overall pattern. This is a contrived example that is equivalent to a+b:
+If a condition is the string (R), and there is no subpattern with the name R,
+the condition is true if matching is currently in a recursion or subroutine
+call to the whole pattern or any subpattern. If digits follow the letter R, and
+there is no subpattern with that name, the condition is true if the most recent
+call is into a subpattern with the given number, which must exist somewhere in
+the overall pattern. This is a contrived example that is equivalent to a+b:
.sp
((?(R1)a+|(?1)b))
.sp
-However, in both cases, if there is a capture group with a matching name, the
+However, in both cases, if there is a subpattern with a matching name, the
condition tests for its being set, as described in the section above, instead
of testing for recursion. For example, creating a group with the name R1 by
adding (?<R1>) to the above pattern completely changes its meaning.
@@ -2799,27 +2691,27 @@ If a name preceded by ampersand follows the letter R, for example:
.sp
(?(R&name)...)
.sp
-the condition is true if the most recent recursion is into a group of that name
-(which must exist within the pattern).
+the condition is true if the most recent recursion is into a subpattern of that
+name (which must exist within the pattern).
.P
This condition does not check the entire recursion stack. It tests only the
current level. If the name used in a condition of this kind is a duplicate, the
-test is applied to all groups of the same name, and is true if any one of
+test is applied to all subpatterns of the same name, and is true if any one of
them is the most recent recursion.
.P
At "top level", all these recursion test conditions are false.
.
.
.\" HTML <a name="subdefine"></a>
-.SS "Defining capture groups for use by reference only"
+.SS "Defining subpatterns for use by reference only"
.rs
.sp
If the condition is the string (DEFINE), the condition is always false, even if
there is a group with the name DEFINE. In this case, there may be only one
-alternative in the rest of the conditional group. It is always skipped if
-control reaches this point in the pattern; the idea of DEFINE is that it can be
-used to define subroutines that can be referenced from elsewhere. (The use of
-.\" HTML <a href="#groupsassubroutines">
+alternative in the subpattern. It is always skipped if control reaches this
+point in the pattern; the idea of DEFINE is that it can be used to define
+subroutines that can be referenced from elsewhere. (The use of
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutines
.\"
@@ -2859,10 +2751,10 @@ than two digits.
.SS "Assertion conditions"
.rs
.sp
-If the condition is not in any of the above formats, it must be a parenthesized
-assertion. This may be a positive or negative lookahead or lookbehind
-assertion. Consider this pattern, again containing non-significant white space,
-and with the two alternatives on the second line:
+If the condition is not in any of the above formats, it must be an assertion.
+This may be a positive or negative lookahead or lookbehind assertion. Consider
+this pattern, again containing non-significant white space, and with the two
+alternatives on the second line:
.sp
(?(?=[^a-z]*[a-z])
\ed{2}-[a-z]{3}-\ed{2} | \ed{2}-\ed{2}-\ed{2} )
@@ -2874,11 +2766,11 @@ subject is matched against the first alternative; otherwise it is matched
against the second. This pattern matches strings in one of the two forms
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
.P
-When an assertion that is a condition contains capture groups, any
+When an assertion that is a condition contains capturing subpatterns, any
capturing that occurs in a matching branch is retained afterwards, for both
positive and negative assertions, because matching always continues after the
assertion, whether it succeeds or fails. (Compare non-conditional assertions,
-for which captures are retained only for positive assertions that succeed.)
+when captures are retained only for positive assertions that succeed.)
.
.
.\" HTML <a name="comments"></a>
@@ -2888,7 +2780,7 @@ for which captures are retained only for positive assertions that succeed.)
There are two ways of including comments in patterns that are processed by
PCRE2. In both cases, the start of the comment must not be in a character
class, nor in the middle of any other sequence of related characters such as
-(?: or a group name or number. The characters that make up a comment play
+(?: or a subpattern name or number. The characters that make up a comment play
no part in the pattern matching.
.P
The sequence (?# marks the start of a comment that continues up to the next
@@ -2938,13 +2830,13 @@ recursively to the pattern in which it appears.
.P
Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it
supports special syntax for recursion of the entire pattern, and also for
-individual capture group recursion. After its introduction in PCRE1 and Python,
+individual subpattern recursion. After its introduction in PCRE1 and Python,
this kind of recursion was subsequently introduced into Perl at release 5.10.
.P
A special item that consists of (? followed by a number greater than zero and a
-closing parenthesis is a recursive subroutine call of the capture group of the
-given number, provided that it occurs inside that group. (If not, it is a
-.\" HTML <a href="#groupsassubroutines">
+closing parenthesis is a recursive subroutine call of the subpattern of the
+given number, provided that it occurs inside that subpattern. (If not, it is a
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
non-recursive subroutine
.\"
@@ -2977,26 +2869,26 @@ parentheses preceding the recursion. In other words, a negative number counts
capturing parentheses leftwards from the point at which it is encountered.
.P
Be aware however, that if
-.\" HTML <a href="#dupgroupnumber">
+.\" HTML <a href="#dupsubpatternnumber">
.\" </a>
-duplicate capture group numbers
+duplicate subpattern numbers
.\"
-are in use, relative references refer to the earliest group with the
+are in use, relative references refer to the earliest subpattern with the
appropriate number. Consider, for example:
.sp
(?|(a)|(b)) (c) (?-2)
.sp
-The first two capture groups (a) and (b) are both numbered 1, and group (c)
+The first two capturing groups (a) and (b) are both numbered 1, and group (c)
is number 2. When the reference (?-2) is encountered, the second most recently
opened parentheses has the number 1, but it is the first such group (the (a)
group) to which the recursion refers. This would be the same if an absolute
reference (?1) was used. In other words, relative references are just a
shorthand for computing a group number.
.P
-It is also possible to refer to subsequent capture groups, by writing
+It is also possible to refer to subsequently opened parentheses, by writing
references such as (?+2). However, these cannot be recursive because the
reference is not inside the parentheses that are referenced. They are always
-.\" HTML <a href="#groupsassubroutines">
+.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
non-recursive subroutine
.\"
@@ -3008,7 +2900,7 @@ rewrite the above example as follows:
.sp
(?<pn> \e( ( [^()]++ | (?&pn) )* \e) )
.sp
-If there is more than one group with the same name, the earliest one is
+If there is more than one subpattern with the same name, the earliest one is
used.
.P
The example pattern that we have been looking at contains nested unlimited
@@ -3034,9 +2926,9 @@ documentation). If the pattern above is matched against
(ab(cd)ef)
.sp
the value for the inner capturing parentheses (numbered 2) is "ef", which is
-the last value taken on at the top level. If a capture group is not matched at
-the top level, its final captured value is unset, even if it was (temporarily)
-set at a deeper level during the matching process.
+the last value taken on at the top level. If a capturing subpattern is not
+matched at the top level, its final captured value is unset, even if it was
+(temporarily) set at a deeper level during the matching process.
.P
Do not confuse the (?R) item with the condition (R), which tests for recursion.
Consider this pattern, which matches text in angle brackets, allowing for
@@ -3045,9 +2937,9 @@ recursing), whereas any characters are permitted at the outer level.
.sp
< (?: (?(R) \ed++ | [^<>]*+) | (?R)) * >
.sp
-In this pattern, (?(R) is the start of a conditional group, with two different
-alternatives for the recursive and non-recursive cases. The (?R) item is the
-actual recursive call.
+In this pattern, (?(R) is the start of a conditional subpattern, with two
+different alternatives for the recursive and non-recursive cases. The (?R) item
+is the actual recursive call.
.
.
.\" HTML <a name="recursiondifference"></a>
@@ -3057,7 +2949,7 @@ actual recursive call.
Some former differences between PCRE2 and Perl no longer exist.
.P
Before release 10.30, recursion processing in PCRE2 differed from Perl in that
-a recursive subroutine call was always treated as an atomic group. That is,
+a recursive subpattern call was always treated as an atomic group. That is,
once it had matched some of the subject string, it was never re-entered, even
if it contained untried alternatives and there was a subsequent matching
failure. (Historical note: PCRE implemented recursion before Perl did.)
@@ -3090,7 +2982,7 @@ Perl takes so long that you think it has gone into a loop.
.P
Another way in which PCRE2 and Perl used to differ in their recursion
processing is in the handling of captured values. Formerly in Perl, when a
-group was called recursively or as a subroutine (see the next section), it
+subpattern was called recursively or as a subpattern (see the next section), it
had no access to any values that were captured outside the recursion, whereas
in PCRE2 these values can be referenced. Consider this pattern:
.sp
@@ -3103,16 +2995,17 @@ alternative matches "a" and then recurses. In the recursion, \e1 does now match
later versions (I tried 5.024) it now works.
.
.
-.\" HTML <a name="groupsassubroutines"></a>
-.SH "GROUPS AS SUBROUTINES"
+.\" HTML <a name="subpatternsassubroutines"></a>
+.SH "SUBPATTERNS AS SUBROUTINES"
.rs
.sp
-If the syntax for a recursive group call (either by number or by name) is used
-outside the parentheses to which it refers, it operates a bit like a subroutine
-in a programming language. More accurately, PCRE2 treats the referenced group
-as an independent subpattern which it tries to match at the current matching
-position. The called group may be defined before or after the reference. A
-numbered reference can be absolute or relative, as in these examples:
+If the syntax for a recursive subpattern call (either by number or by
+name) is used outside the parentheses to which it refers, it operates a bit
+like a subroutine in a programming language. More accurately, PCRE2 treats the
+referenced subpattern as an independent subpattern which it tries to match at
+the current matching position. The called subpattern may be defined before or
+after the reference. A numbered reference can be absolute or relative, as in
+these examples:
.sp
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -3135,21 +3028,21 @@ changed at PCRE2 release 10.30, so backtracking into subroutine calls can now
occur. However, any capturing parentheses that are set during the subroutine
call revert to their previous values afterwards.
.P
-Processing options such as case-independence are fixed when a group is
+Processing options such as case-independence are fixed when a subpattern is
defined, so if it is used as a subroutine, such options cannot be changed for
different calls. For example, consider this pattern:
.sp
(abc)(?i:(?-1))
.sp
It matches "abcabc". It does not match "abcABC" because the change of
-processing option does not affect the called group.
+processing option does not affect the called subpattern.
.P
The behaviour of
.\" HTML <a href="#backtrackcontrol">
.\" </a>
backtracking control verbs
.\"
-in groups when called as subroutines is described in the section entitled
+in subpatterns when called as subroutines is described in the section entitled
.\" HTML <a href="#btsub">
.\" </a>
"Backtracking verbs in subroutines"
@@ -3163,8 +3056,8 @@ below.
.sp
For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
-syntax for calling a group as a subroutine, possibly recursively. Here are two
-of the examples used above, rewritten using this syntax:
+syntax for referencing a subpattern as a subroutine, possibly recursively. Here
+are two of the examples used above, rewritten using this syntax:
.sp
(?<pn> \e( ( (?>[^()]+) | \eg<pn> )* \e) )
(sens|respons)e and \eg'1'ibility
@@ -3262,7 +3155,6 @@ There are a number of special "Backtracking Control Verbs" (to use Perl's
terminology) that modify the behaviour of backtracking during matching. They
are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form,
possibly behaving differently depending on whether or not a name is present.
-The names are not required to be unique within the pattern.
.P
By default, for compatibility with Perl, a name is any sequence of characters
that does not include a closing parenthesis. The name is not processed in
@@ -3306,7 +3198,7 @@ assertions,
and in
.\" HTML <a href="#btsub">
.\" </a>
-capture groups called as subroutines
+subpatterns called as subroutines
.\"
(whether or not recursively) is documented below.
.
@@ -3338,7 +3230,6 @@ Experiments with Perl suggest that it too has similar optimizations, and like
PCRE2, turning them off can change the result of a match.
.
.
-.\" HTML <a name="acceptverb"></a>
.SS "Verbs that act immediately"
.rs
.sp
@@ -3347,8 +3238,8 @@ The following verbs act as soon as they are encountered.
(*ACCEPT) or (*ACCEPT:NAME)
.sp
This verb causes the match to end successfully, skipping the remainder of the
-pattern. However, when it is inside a capture group that is called as a
-subroutine, only that group is ended successfully. Matching then continues
+pattern. However, when it is inside a subpattern that is called as a
+subroutine, only that subpattern is ended successfully. Matching then continues
at the outer level. If (*ACCEPT) in triggered in a positive assertion, the
assertion succeeds; in a negative assertion, the assertion fails.
.P
@@ -3359,9 +3250,6 @@ example:
.sp
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
-.P
-\fBWarning:\fP (*ACCEPT) should not be used within a script run group, because
-it causes an immediate exit from the group, bypassing the script run checking.
.sp
(*FAIL) or (*FAIL:NAME)
.sp
@@ -3376,8 +3264,8 @@ nearest equivalent is the callout feature, as for example in this pattern:
A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
.P
-(*ACCEPT:NAME) and (*FAIL:NAME) are treated as (*MARK:NAME)(*ACCEPT) and
-(*MARK:NAME)(*FAIL), respectively.
+(*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+(*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
.
.
.SS "Recording which path was taken"
@@ -3389,10 +3277,10 @@ starting point (see (*SKIP) below).
.sp
(*MARK:NAME) or (*:NAME)
.sp
-A name is always required with this verb. For all the other backtracking
-control verbs, a NAME argument is optional.
+A name is always required with this verb. There may be as many instances of
+(*MARK) as you like in a pattern, and their names do not have to be unique.
.P
-When a match succeeds, the name of the last-encountered mark name on the
+When a match succeeds, the name of the last-encountered (*MARK:NAME) on the
matching path is passed back to the caller as described in the section entitled
.\" HTML <a href="pcre2api.html#matchotherdata">
.\" </a>
@@ -3402,15 +3290,16 @@ in the
.\" HREF
\fBpcre2api\fP
.\"
-documentation. This applies to all instances of (*MARK) and other verbs,
-including those inside assertions and atomic groups. However, there are
-differences in those cases when (*MARK) is used in conjunction with (*SKIP) as
-described below.
+documentation. This applies to all instances of (*MARK), including those inside
+assertions and atomic groups. (There are differences in those cases when
+(*MARK) is used in conjunction with (*SKIP) as described below.)
+.P
+As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+associated NAME arguments. Whichever is last on the matching path is passed
+back. See below for more details of these other verbs.
.P
-The mark name that was last encountered on the matching path is passed back. A
-verb without a NAME argument is ignored for this purpose. Here is an example of
-\fBpcre2test\fP output, where the "mark" modifier requests the retrieval and
-outputting of (*MARK) data:
+Here is an example of \fBpcre2test\fP output, where the "mark" modifier
+requests the retrieval and outputting of (*MARK) data:
.sp
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XY
@@ -3460,7 +3349,7 @@ to the left of the verb. However, when one of these verbs appears inside an
atomic group or in a lookaround assertion that is true, its effect is confined
to that group, because once the group has been matched, there is never any
backtracking into it. Backtracking from beyond an assertion or an atomic group
-ignores the entire group, and seeks a preceding backtracking point.
+ignores the entire group, and seeks a preceeding backtracking point.
.P
These verbs differ in exactly what kind of failure occurs when backtracking
reaches them. The behaviour described below is what happens when the verb is
@@ -3483,8 +3372,8 @@ dynamic anchor, or "I've started, so I must finish."
.P
The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
-caller. However, (*SKIP:NAME) searches only for names that are set with
-(*MARK), ignoring those set by any of the other backtracking verbs.
+caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
.P
If there is more than one backtracking verb in a pattern, a different one that
follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a
@@ -3525,7 +3414,7 @@ as (*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by other backtracking verbs.
+ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
.sp
(*SKIP)
.sp
@@ -3578,7 +3467,7 @@ never seen because "a" does not match "b", so the matcher immediately jumps to
the second branch of the pattern.
.P
Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores
-names that are set by other backtracking verbs.
+names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or (*THEN:NAME).
.sp
(*THEN) or (*THEN:NAME)
.sp
@@ -3599,30 +3488,32 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by other backtracking verbs.
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
.P
-A group that does not contain a | character is just a part of the enclosing
-alternative; it is not a nested alternation with only one alternative. The
-effect of (*THEN) extends beyond such a group to the enclosing alternative.
-Consider this pattern, where A, B, etc. are complex pattern fragments that do
-not contain any | characters at this level:
+A subpattern that does not contain a | character is just a part of the
+enclosing alternative; it is not a nested alternation with only one
+alternative. The effect of (*THEN) extends beyond such a subpattern to the
+enclosing alternative. Consider this pattern, where A, B, etc. are complex
+pattern fragments that do not contain any | characters at this level:
.sp
A (B(*THEN)C) | D
.sp
If A and B are matched, but there is a failure in C, matching does not
backtrack into A; instead it moves to the next alternative, that is, D.
-However, if the group containing (*THEN) is given an alternative, it
+However, if the subpattern containing (*THEN) is given an alternative, it
behaves differently:
.sp
A (B(*THEN)C | (*FAIL)) | D
.sp
-The effect of (*THEN) is now confined to the inner group. After a failure in C,
-matching moves to (*FAIL), which causes the whole group to fail because there
-are no more alternatives to try. In this case, matching does backtrack into A.
+The effect of (*THEN) is now confined to the inner subpattern. After a failure
+in C, matching moves to (*FAIL), which causes the whole subpattern to fail
+because there are no more alternatives to try. In this case, matching does now
+backtrack into A.
.P
-Note that a conditional group is not considered as having two alternatives,
-because only one is ever used. In other words, the | character in a conditional
-group has a different meaning. Ignoring white space, consider:
+Note that a conditional subpattern is not considered as having two
+alternatives, because only one is ever used. In other words, the | character in
+a conditional subpattern has a different meaning. Ignoring white space,
+consider:
.sp
^.*? (?(?=a) a | b(*THEN)c )
.sp
@@ -3630,7 +3521,7 @@ If the subject is "ba", this pattern does not match. Because .*? is ungreedy,
it initially matches zero characters. The condition (?=a) then fails, the
character "b" is matched, but "c" is not. At this point, matching does not
backtrack to .*? as might perhaps be expected from the presence of the |
-character. The conditional group is part of the single alternative that
+character. The conditional subpattern is part of the single alternative that
comprises the whole pattern, and so the match fails. (If there was a backtrack
into .*?, allowing it to match "b", the match would succeed.)
.P
@@ -3687,13 +3578,13 @@ acts.
(*FAIL) in any assertion has its normal effect: it forces an immediate
backtrack. The behaviour of the other backtracking verbs depends on whether or
not the assertion is standalone or acting as the condition in a conditional
-group.
+subpattern.
.P
(*ACCEPT) in a standalone positive assertion causes the assertion to succeed
-without any further processing; captured strings and a mark name (if set) are
-retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to
-fail without any further processing; captured substrings and any mark name are
-discarded.
+without any further processing; captured strings and a (*MARK) name (if set)
+are retained. In a standalone negative assertion, (*ACCEPT) causes the
+assertion to fail without any further processing; captured substrings and any
+(*MARK) name are discarded.
.P
If the assertion is a condition, (*ACCEPT) causes the condition to be true for
a positive assertion and false for a negative one; captured substrings are
@@ -3722,24 +3613,25 @@ the assertion to be true, without considering any further alternative branches.
.SS "Backtracking verbs in subroutines"
.rs
.sp
-These behaviours occur whether or not the group is called recursively.
+These behaviours occur whether or not the subpattern is called recursively.
.P
-(*ACCEPT) in a group called as a subroutine causes the subroutine match to
+(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to
succeed without any further processing. Matching then continues after the
subroutine call. Perl documents this behaviour. Perl's treatment of the other
verbs in subroutines is different in some cases.
.P
-(*FAIL) in a group called as a subroutine has its normal effect: it forces
+(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
an immediate backtrack.
.P
(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when
-triggered by being backtracked to in a group called as a subroutine. There is
-then a backtrack at the outer level.
+triggered by being backtracked to in a subpattern called as a subroutine. There
+is then a backtrack at the outer level.
.P
(*THEN), when triggered, skips to the next alternative in the innermost
-enclosing group that has alternatives (its normal behaviour). However, if there
-is no such group within the subroutine's group, the subroutine match fails and
-there is a backtrack at the outer level.
+enclosing group within the subpattern that has alternatives (its normal
+behaviour). However, if there is no such group within the subroutine
+subpattern, the subroutine match fails and there is a backtrack at the outer
+level.
.
.
.SH "SEE ALSO"
@@ -3763,6 +3655,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 12 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 04 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2perform.3 b/dist2/doc/pcre2perform.3
index 040369a7..91ca22a9 100644
--- a/dist2/doc/pcre2perform.3
+++ b/dist2/doc/pcre2perform.3
@@ -1,4 +1,4 @@
-.TH PCRE2PERFORM 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2PERFORM 3 "25 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 PERFORMANCE"
@@ -14,9 +14,9 @@ of them.
Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
so that most simple patterns do not use much memory for storing the compiled
version. However, there is one case where the memory usage of a compiled
-pattern can be unexpectedly large. If a parenthesized group has a quantifier
-with a minimum greater than 1 and/or a limited maximum, the whole group is
-repeated in the compiled code. For example, the pattern
+pattern can be unexpectedly large. If a parenthesized subpattern has a
+quantifier with a minimum greater than 1 and/or a limited maximum, the whole
+subpattern is repeated in the compiled code. For example, the pattern
.sp
(abc|def){2,4}
.sp
@@ -239,6 +239,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 25 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2posix.3 b/dist2/doc/pcre2posix.3
index 35e68e2d..0d8b2c24 100644
--- a/dist2/doc/pcre2posix.3
+++ b/dist2/doc/pcre2posix.3
@@ -1,4 +1,4 @@
-.TH PCRE2POSIX 3 "30 January 2019" "PCRE2 10.33"
+.TH PCRE2POSIX 3 "15 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SYNOPSIS"
@@ -7,60 +7,37 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.B #include <pcre2posix.h>
.PP
.nf
-.B int pcre2_regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
+.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
.B " int \fIcflags\fP);"
.sp
-.B int pcre2_regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP,
+.B int regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP,
.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);"
.sp
-.B "size_t pcre2_regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,"
+.B "size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,"
.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);"
.sp
-.B void pcre2_regfree(regex_t *\fIpreg\fP);
+.B void regfree(regex_t *\fIpreg\fP);
.fi
.
.SH DESCRIPTION
.rs
.sp
This set of functions provides a POSIX-style API for the PCRE2 regular
-expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
-and 32-bit libraries. See the
+expression 8-bit library. See the
.\" HREF
\fBpcre2api\fP
.\"
documentation for a description of PCRE2's native API, which contains much
-additional functionality.
+additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit
+and 32-bit libraries.
.P
-The functions described here are wrapper functions that ultimately call the
-PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
-file, and they all have unique names starting with \fBpcre2_\fP. However, the
-\fBpcre2posix.h\fP header also contains macro definitions that convert the
-standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
-means that a program can use the usual POSIX names without running the risk of
-accidentally linking with POSIX functions from a different library.
+The functions described here are just wrapper functions that ultimately call
+the PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP
+header file, and on Unix systems the library itself is called
+\fBlibpcre2-posix.a\fP, so can be accessed by adding \fB-lpcre2-posix\fP to the
+command for linking an application that uses them. Because the POSIX functions
+call the native ones, it is also necessary to add \fB-lpcre2-8\fP.
.P
-On Unix-like systems the PCRE2 POSIX library is called \fBlibpcre2-posix\fP, so
-can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
-application. Because the POSIX functions call the native ones, it is also
-necessary to add \fB-lpcre2-8\fP.
-.P
-Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
-does contain functions with the POSIX names \fBregcomp()\fP etc. These simply
-pass their arguments to the PCRE2 functions. These functions are provided for
-backwards compatibility with earlier versions of PCRE2, so that existing
-programs do not have to be recompiled.
-.P
-Calling the header file \fBpcre2posix.h\fP avoids any conflict with other POSIX
-libraries. It can, of course, be renamed or aliased as \fBregex.h\fP, which is
-the "correct" name, if there is no clash. It provides two structure types,
-\fIregex_t\fP for compiled internal forms, and \fIregmatch_t\fP for returning
-captured substrings. It also defines some constants whose names start with
-"REG_"; these are used for setting options and identifying error codes.
-.
-.
-.SH "USING THE POSIX FUNCTIONS"
-.rs
-.sp
Those POSIX option bits that can reasonably be mapped to PCRE2 native options
have been implemented. In addition, the option REG_EXTENDED is defined with the
value zero. This has no effect, but since programs that are written to the
@@ -78,15 +55,19 @@ described below. "POSIX-like in style" means that the API approximates to the
POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding
domains it is probably even less compatible.
.P
-The descriptions below use the actual names of the functions, but, as described
-above, the standard POSIX names (without the \fBpcre2_\fP prefix) may also be
-used.
+The header for these functions is supplied as \fBpcre2posix.h\fP to avoid any
+potential clash with other POSIX libraries. It can, of course, be renamed or
+aliased as \fBregex.h\fP, which is the "correct" name. It provides two
+structure types, \fIregex_t\fP for compiled internal forms, and
+\fIregmatch_t\fP for returning captured substrings. It also defines some
+constants whose names start with "REG_"; these are used for setting options and
+identifying error codes.
.
.
.SH "COMPILING A PATTERN"
.rs
.sp
-The function \fBpcre2_regcomp()\fP is called to compile a pattern into an
+The function \fBregcomp()\fP is called to compile a pattern into an
internal form. By default, the pattern is a C string terminated by a binary
zero (but see REG_PEND below). The \fIpreg\fP argument is a pointer to a
\fBregex_t\fP structure that is used as a base for storing information about
@@ -123,18 +104,18 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
.sp
REG_NOSUB
.sp
-When a pattern that is compiled with this flag is passed to
-\fBpcre2_regexec()\fP for matching, the \fInmatch\fP and \fIpmatch\fP arguments
-are ignored, and no captured strings are returned. Versions of the PCRE library
-prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
-no longer happens because it disables the use of backreferences.
+When a pattern that is compiled with this flag is passed to \fBregexec()\fP for
+matching, the \fInmatch\fP and \fIpmatch\fP arguments are ignored, and no
+captured strings are returned. Versions of the PCRE library prior to 10.22 used
+to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens
+because it disables the use of backreferences.
.sp
REG_PEND
.sp
If this option is set, the \fBreg_endp\fP field in the \fIpreg\fP structure
(which has the type const char *) must be set to point to the character beyond
-the end of the pattern before calling \fBpcre2_regcomp()\fP. The pattern itself
-may now contain binary zeros, which are treated as data characters. Without
+the end of the pattern before calling \fBregcomp()\fP. The pattern itself may
+now contain binary zeros, which are treated as data characters. Without
REG_PEND, a binary zero terminates the pattern and the \fBre_endp\fP field is
ignored. This is a GNU extension to the POSIX standard and should be used with
caution in software intended to be portable to other systems.
@@ -167,16 +148,15 @@ Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only
newlines are matched by the dot metacharacter (they are not) or by a negative
class such as [^a] (they are).
.P
-The yield of \fBpcre2_regcomp()\fP is zero on success, and non-zero otherwise.
-The \fIpreg\fP structure is filled in on success, and one other member of the
+The yield of \fBregcomp()\fP is zero on success, and non-zero otherwise. The
+\fIpreg\fP structure is filled in on success, and one other member of the
structure (as well as \fIre_endp\fP) is public: \fIre_nsub\fP contains the
number of capturing subpatterns in the regular expression. Various error codes
are defined in the header file.
.P
-NOTE: If the yield of \fBpcre2_regcomp()\fP is non-zero, you must not attempt
-to use the contents of the \fIpreg\fP structure. If, for example, you pass it
-to \fBpcre2_regexec()\fP, the result is undefined and your program is likely to
-crash.
+NOTE: If the yield of \fBregcomp()\fP is non-zero, you must not attempt to
+use the contents of the \fIpreg\fP structure. If, for example, you pass it to
+\fBregexec()\fP, the result is undefined and your program is likely to crash.
.
.
.SH "MATCHING NEWLINE CHARACTERS"
@@ -213,7 +193,7 @@ is no way to stop newline from matching [^a].
Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and
PCRE2_DOLLAR_ENDONLY when calling \fBpcre2_compile()\fP directly, but there is
no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using
-the POSIX API, passing REG_NEWLINE to PCRE2's \fBpcre2_regcomp()\fP function
+the POSIX API, passing REG_NEWLINE to PCRE2's \fBregcomp()\fP function
causes PCRE2_MULTILINE to be passed to \fBpcre2_compile()\fP, and REG_DOTALL
passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.
.
@@ -221,10 +201,10 @@ passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.
.SH "MATCHING A PATTERN"
.rs
.sp
-The function \fBpcre2_regexec()\fP is called to match a compiled pattern
-\fIpreg\fP against a given \fIstring\fP, which is by default terminated by a
-zero byte (but see REG_STARTEND below), subject to the options in \fIeflags\fP.
-These can be:
+The function \fBregexec()\fP is called to match a compiled pattern \fIpreg\fP
+against a given \fIstring\fP, which is by default terminated by a zero byte
+(but see REG_STARTEND below), subject to the options in \fIeflags\fP. These can
+be:
.sp
REG_NOTBOL
.sp
@@ -265,7 +245,7 @@ are mutually exclusive; the error REG_INVARG is returned.
.P
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
-\fBpcre2_regexec()\fP are ignored (except possibly as input for REG_STARTEND).
+\fBregexec()\fP are ignored (except possibly as input for REG_STARTEND).
.P
The value of \fInmatch\fP may be zero, and the value \fIpmatch\fP may be NULL
(unless REG_STARTEND is set); in both these cases no data about any matched
@@ -288,23 +268,22 @@ header file, of which REG_NOMATCH is the "expected" failure code.
.SH "ERROR MESSAGES"
.rs
.sp
-The \fBpcre2_regerror()\fP function maps a non-zero errorcode from either
-\fBpcre2_regcomp()\fP or \fBpcre2_regexec()\fP to a printable message. If
-\fIpreg\fP is not NULL, the error should have arisen from the use of that
-structure. A message terminated by a binary zero is placed in \fIerrbuf\fP. If
-the buffer is too short, only the first \fIerrbuf_size\fP - 1 characters of the
-error message are used. The yield of the function is the size of buffer needed
-to hold the whole message, including the terminating zero. This value is
-greater than \fIerrbuf_size\fP if the message was truncated.
+The \fBregerror()\fP function maps a non-zero errorcode from either
+\fBregcomp()\fP or \fBregexec()\fP to a printable message. If \fIpreg\fP is not
+NULL, the error should have arisen from the use of that structure. A message
+terminated by a binary zero is placed in \fIerrbuf\fP. If the buffer is too
+short, only the first \fIerrbuf_size\fP - 1 characters of the error message are
+used. The yield of the function is the size of buffer needed to hold the whole
+message, including the terminating zero. This value is greater than
+\fIerrbuf_size\fP if the message was truncated.
.
.
.SH MEMORY USAGE
.rs
.sp
Compiling a regular expression causes memory to be allocated and associated
-with the \fIpreg\fP structure. The function \fBpcre2_regfree()\fP frees all
-such memory, after which \fIpreg\fP may no longer be used as a compiled
-expression.
+with the \fIpreg\fP structure. The function \fBregfree()\fP frees all such
+memory, after which \fIpreg\fP may no longer be used as a compiled expression.
.
.
.SH AUTHOR
@@ -321,6 +300,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 30 January 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 15 June 2017
+Copyright (c) 1997-2017 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2syntax.3 b/dist2/doc/pcre2syntax.3
index 70538e4e..c392bfb0 100644
--- a/dist2/doc/pcre2syntax.3
+++ b/dist2/doc/pcre2syntax.3
@@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "11 February 2019" "PCRE2 10.33"
+.TH PCRE2SYNTAX 3 "02 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -22,8 +22,7 @@ documentation. This document contains a quick-reference summary of the syntax.
.SH "ESCAPED CHARACTERS"
.rs
.sp
-This table applies to ASCII and Unicode environments. An unrecognized escape
-sequence causes an error.
+This table applies to ASCII and Unicode environments.
.sp
\ea alarm, that is, the BEL character (hex 07)
\ecx "control-x", where x is any ASCII printing character
@@ -35,24 +34,12 @@ sequence causes an error.
\e0dd character with octal code 0dd
\eddd character with octal code ddd, or backreference
\eo{ddd..} character with octal code ddd..
+ \eU "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
\eN{U+hh..} character with Unicode code point hh.. (Unicode mode only)
+ \euhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\exhh character with hex code hh
\ex{hh..} character with hex code hh..
.sp
-If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
-following are also recognized:
-.sp
- \eU the character "U"
- \euhhhh character with hex code hhhh
- \eu{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX
-.sp
-When \ex is not followed by {, from zero to two hexadecimal digits are read,
-but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be
-recognized as a hexadecimal escape; otherwise it matches a literal "x".
-Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits
-or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
-matches a literal "u".
-.P
Note that \e0dd is always an octal code. The treatment of backslash followed by
a non-zero digit is complicated; for details see the section
.\" HTML <a href="pcre2pattern.html#digitsafterbackslash">
@@ -67,6 +54,12 @@ documentation, where details of escape processing in EBCDIC environments are
also given. \eN{U+hh..} is synonymous with \ex{hh..} in PCRE2 but is not
supported in EBCDIC environments. Note that \eN not followed by an opening
curly bracket has a different meaning (see below).
+.P
+When \ex is not followed by {, from zero to two hexadecimal digits are read,
+but if PCRE2_ALT_BSUX is set, \ex must be followed by two hexadecimal digits to
+be recognized as a hexadecimal escape; otherwise it matches a literal "x".
+Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits,
+it matches a literal "u".
.
.
.SH "CHARACTER TYPES"
@@ -405,24 +398,19 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
.SH "CAPTURING"
.rs
.sp
- (...) capture group
- (?<name>...) named capture group (Perl)
- (?'name'...) named capture group (Perl)
- (?P<name>...) named capture group (Python)
- (?:...) non-capture group
- (?|...) non-capture group; reset group numbers for
- capture groups in each alternative
-.sp
-In non-UTF modes, names may contain underscores and ASCII letters and digits;
-in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
-both cases, a name must not start with a digit.
+ (...) capturing group
+ (?<name>...) named capturing group (Perl)
+ (?'name'...) named capturing group (Perl)
+ (?P<name>...) named capturing group (Python)
+ (?:...) non-capturing group
+ (?|...) non-capturing group; reset group numbers for
+ capturing groups in each alternative
.
.
.SH "ATOMIC GROUPS"
.rs
.sp
- (?>...) atomic non-capture group
- (*atomic:...) atomic non-capture group
+ (?>...) atomic, non-capturing group
.
.
.SH "COMMENT"
@@ -450,7 +438,7 @@ of the group.
Unsetting x or xx unsets both. Several options may be set at once, and a
mixture of setting and unsetting such as (?i-x) is allowed, but there may be
only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
-(?^in). An option setting may appear at the start of a non-capture group, for
+(?^in). An option setting may appear at the start of a non-capturing group, for
example (?i:...).
.P
The following are recognized only at the very start of a pattern or after one
@@ -503,33 +491,12 @@ setting with a similar syntax.
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
.rs
.sp
- (?=...) )
- (*pla:...) ) positive lookahead
- (*positive_lookahead:...) )
-.sp
- (?!...) )
- (*nla:...) ) negative lookahead
- (*negative_lookahead:...) )
-.sp
- (?<=...) )
- (*plb:...) ) positive lookbehind
- (*positive_lookbehind:...) )
-.sp
- (?<!...) )
- (*nlb:...) ) negative lookbehind
- (*negative_lookbehind:...) )
-.sp
-Each top-level branch of a lookbehind must be of a fixed length.
-.
-.
-.SH "SCRIPT RUNS"
-.rs
-.sp
- (*script_run:...) ) script run, can be backtracked into
- (*sr:...) )
+ (?=...) positive look ahead
+ (?!...) negative look ahead
+ (?<=...) positive look behind
+ (?<!...) negative look behind
.sp
- (*atomic_script_run:...) ) atomic script run
- (*asr:...) )
+Each top-level branch of a look behind must be of a fixed length.
.
.
.SH "BACKREFERENCES"
@@ -553,19 +520,19 @@ Each top-level branch of a lookbehind must be of a fixed length.
.rs
.sp
(?R) recurse whole pattern
- (?n) call subroutine by absolute number
- (?+n) call subroutine by relative number
- (?-n) call subroutine by relative number
- (?&name) call subroutine by name (Perl)
- (?P>name) call subroutine by name (Python)
- \eg<name> call subroutine by name (Oniguruma)
- \eg'name' call subroutine by name (Oniguruma)
- \eg<n> call subroutine by absolute number (Oniguruma)
- \eg'n' call subroutine by absolute number (Oniguruma)
- \eg<+n> call subroutine by relative number (PCRE2 extension)
- \eg'+n' call subroutine by relative number (PCRE2 extension)
- \eg<-n> call subroutine by relative number (PCRE2 extension)
- \eg'-n' call subroutine by relative number (PCRE2 extension)
+ (?n) call subpattern by absolute number
+ (?+n) call subpattern by relative number
+ (?-n) call subpattern by relative number
+ (?&name) call subpattern by name (Perl)
+ (?P>name) call subpattern by name (Python)
+ \eg<name> call subpattern by name (Oniguruma)
+ \eg'name' call subpattern by name (Oniguruma)
+ \eg<n> call subpattern by absolute number (Oniguruma)
+ \eg'n' call subpattern by absolute number (Oniguruma)
+ \eg<+n> call subpattern by relative number (PCRE2 extension)
+ \eg'+n' call subpattern by relative number (PCRE2 extension)
+ \eg<-n> call subpattern by relative number (PCRE2 extension)
+ \eg'-n' call subpattern by relative number (PCRE2 extension)
.
.
.SH "CONDITIONAL PATTERNS"
@@ -583,7 +550,7 @@ Each top-level branch of a lookbehind must be of a fixed length.
(?(R) overall recursion condition
(?(Rn) specific numbered group recursion condition
(?(R&name) specific named group recursion condition
- (?(DEFINE) define groups for reference
+ (?(DEFINE) define subpattern for reference
(?(VERSION[>]=n.m) test PCRE2 version
(?(assert) assertion condition
.sp
@@ -654,6 +621,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 11 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 02 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2test.1 b/dist2/doc/pcre2test.1
index 568e7054..f590fe16 100644
--- a/dist2/doc/pcre2test.1
+++ b/dist2/doc/pcre2test.1
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "11 March 2019" "PCRE 10.33"
+.TH PCRE2TEST 1 "21 July 2018" "PCRE 10.32"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -565,10 +565,8 @@ for a description of the effects of these options.
/s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
- escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
/x extended set PCRE2_EXTENDED
/xx extended_more set PCRE2_EXTENDED_MORE
- extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
firstline set PCRE2_FIRSTLINE
literal set PCRE2_LITERAL
match_line set PCRE2_EXTRA_MATCH_LINE
@@ -673,14 +671,14 @@ information is obtained from the \fBpcre2_pattern_info()\fP function. Here are
some typical examples:
.sp
re> /(?i)(^a|^b)/m,info
- Capture group count = 1
+ Capturing subpattern count = 1
Compile options: multiline
Overall options: caseless multiline
First code unit at start or follows newline
Subject length lower bound = 1
.sp
re> /(?i)abc/info
- Capture group count = 0
+ Capturing subpattern count = 0
Compile options: <none>
Overall options: caseless
First code unit = 'a' (caseless)
@@ -1005,7 +1003,6 @@ process.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text
altglobal alternative global matching
/g global global matching
@@ -1013,11 +1010,8 @@ process.
mark show mark values
replace=<string> specify a replacement string
startchar show starting character when relevant
- substitute_callout use substitution callouts
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
.sp
@@ -1160,7 +1154,6 @@ pattern.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text (non-JIT only)
altglobal alternative global matching
callout_capture show captures at callout time
@@ -1190,11 +1183,8 @@ pattern.
replace=<string> specify a replacement string
startchar show startchar when relevant
startoffset=<n> same as offset=<n>
- substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
zero_terminate pass the subject as zero-terminated
@@ -1258,28 +1248,10 @@ captured parentheses be output after a match. By default, only those up to the
highest one actually used in the match are output (corresponding to the return
code from \fBpcre2_match()\fP). Groups that did not take part in the match
are output as "<unset>". This modifier is not relevant for DFA matching (which
-does no capturing) and does not apply when \fBreplace\fP is specified; it is
-ignored, with a warning message, if present.
+does no capturing); it is ignored, with a warning message, if present.
.
.
-.SS "Showing the entire ovector, for all outcomes"
-.rs
-.sp
-The \fBallvector\fP modifier requests that the entire ovector be shown,
-whatever the outcome of the match. Compare \fBallcaptures\fP, which shows only
-up to the maximum number of capture groups for the pattern, and then only for a
-successful complete non-DFA match. This modifier, which acts after any match
-result, and also for DFA matching, provides a means of checking that there are
-no unexpected modifications to ovector fields. Before each match attempt, the
-ovector is filled with a special value, and if this is found in both elements
-of a capturing pair, "<unchanged>" is output. After a successful match, this
-applies to all groups after the maximum capture group for the pattern. In other
-cases it applies to the entire ovector. After a partial match, the first two
-elements are the only ones that should be set. After a DFA match, the amount of
-ovector that is used depends on the number of matches that were found.
-.
-.
-.SS "Testing pattern callouts"
+.SS "Testing callouts"
.rs
.sp
A callout function is supplied when \fBpcre2test\fP calls the library matching
@@ -1290,12 +1262,6 @@ controlled by various modifiers listed above whose names begin with
.\" </a>
below.
.\"
-Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
-"Testing the substitution function"
-.\" HTML <a href="#substitution">
-.\" </a>
-below.
-.\"
.
.
.SS "Finding all matches in a string"
@@ -1326,8 +1292,8 @@ current character is CR followed by LF, an advance of two characters occurs.
.sp
The \fBcopy\fP and \fBget\fP modifiers can be used to test the
\fBpcre2_substring_copy_xxx()\fP and \fBpcre2_substring_get_xxx()\fP functions.
-They can be given more than once, and each can specify a capture group name or
-number, for example:
+They can be given more than once, and each can specify a group name or number,
+for example:
.sp
abcd\e=copy=1,copy=3,get=G1
.sp
@@ -1346,7 +1312,6 @@ parentheses after each substring, followed by the name when the extraction was
by name.
.
.
-.\" HTML <a name="substitution"></a>
.SS "Testing the substitution function"
.rs
.sp
@@ -1399,10 +1364,10 @@ The default action of \fBpcre2_substitute()\fP is to return
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues
-to go through the motions of matching and substituting (but not doing any
-callouts), in order to compute the size of buffer that is required. When this
-happens, \fBpcre2test\fP shows the required buffer length (which includes space
-for the trailing zero) as part of the error message. For example:
+to go through the motions of matching and substituting, in order to compute the
+size of buffer that is required. When this happens, \fBpcre2test\fP shows the
+required buffer length (which includes space for the trailing zero) as part of
+the error message. For example:
.sp
/abc/substitute_overflow_length
123abc123\e=replace=[9]XYZ
@@ -1413,48 +1378,6 @@ matching provokes an error return ("bad option value") from
\fBpcre2_substitute()\fP.
.
.
-.SS "Testing substitute callouts"
-.rs
-.sp
-If the \fBsubstitute_callout\fP modifier is set, a substitution callout
-function is set up. The \fBnull_context\fP modifier must not be set, because
-the address of the callout function is passed in a match context. When the
-callout function is called (after each substitution), details of the the input
-and output strings are output. For example:
-.sp
- /abc/g,replace=<$0>,substitute_callout
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "<abc>"
- 2(1) Old 6 9 "abc" New 8 13 "<abc>"
- 2: <abc>def<abc>pqr
-.sp
-The first number on each callout line is the count of matches. The
-parenthesized number is the number of pairs that are set in the ovector (that
-is, one more than the number of capturing groups that were set). Then are
-listed the offsets of the old substring, its contents, and the same for the
-replacement.
-.P
-By default, the substitution callout function returns zero, which accepts the
-replacement and causes matching to continue if /g was used. Two further
-modifiers can be used to test other return values. If \fBsubstitute_skip\fP is
-set to a value greater than zero the callout function returns +1 for the match
-of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the
-replacement to be rejected, and -1 causes no further matching to take place. If
-either of them are set, \fBsubstitute_callout\fP is assumed. For example:
-.sp
- /abc/g,replace=<$0>,substitute_skip=1
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "<abc> SKIPPED"
- 2(1) Old 6 9 "abc" New 6 11 "<abc>"
- 2: abcdef<abc>pqr
- abcdefabcpqr\e=substitute_stop=1
- 1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
- 1: abcdefabcpqr
-.sp
-If both are set for the same number, stop takes precedence. Only a single skip
-or stop is supported, which is sufficient for testing that the feature works.
-.
-.
.SS "Setting the JIT stack size"
.rs
.sp
@@ -1589,11 +1512,11 @@ passing the replacement string as zero-terminated.
.rs
.sp
Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
-\fBpcre2_dfa_match()\fP, \fBpcre2_jit_match()\fP or \fBpcre2_substitute()\fP.
-If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for
-testing that the matching and substitution functions behave correctly in this
-case (they use default values). This modifier cannot be used with the
-\fBfind_limits\fP or \fBsubstitute_callout\fP modifiers.
+\fBpcre2_dfa_match()\fP or \fBpcre2_jit_match()\fP. If the \fBnull_context\fP
+modifier is set, however, NULL is passed. This is for testing that the matching
+functions behave correctly in this case (they use default values). This
+modifier cannot be used with the \fBfind_limits\fP modifier or when testing the
+substitution function.
.
.
.SH "THE ALTERNATIVE MATCHING FUNCTION"
@@ -2059,6 +1982,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 11 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 21 July 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/doc/pcre2test.txt b/dist2/doc/pcre2test.txt
index cbe3528a..44727a72 100644
--- a/dist2/doc/pcre2test.txt
+++ b/dist2/doc/pcre2test.txt
@@ -544,10 +544,8 @@ PATTERN MODIFIERS
/s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
- escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
/x extended set PCRE2_EXTENDED
/xx extended_more set PCRE2_EXTENDED_MORE
- extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
firstline set PCRE2_FIRSTLINE
literal set PCRE2_LITERAL
match_line set PCRE2_EXTRA_MATCH_LINE
@@ -647,14 +645,14 @@ PATTERN MODIFIERS
are some typical examples:
re> /(?i)(^a|^b)/m,info
- Capture group count = 1
+ Capturing subpattern count = 1
Compile options: multiline
Overall options: caseless multiline
First code unit at start or follows newline
Subject length lower bound = 1
re> /(?i)abc/info
- Capture group count = 0
+ Capturing subpattern count = 0
Compile options: <none>
Overall options: caseless
First code unit = 'a' (caseless)
@@ -931,7 +929,6 @@ PATTERN MODIFIERS
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text
altglobal alternative global matching
/g global global matching
@@ -939,11 +936,8 @@ PATTERN MODIFIERS
mark show mark values
replace=<string> specify a replacement string
startchar show starting character when relevant
- substitute_callout use substitution callouts
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
@@ -1063,7 +1057,6 @@ SUBJECT MODIFIERS
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
- allvector show the entire ovector
allusedtext show all consulted text (non-JIT only)
altglobal alternative global matching
callout_capture show captures at callout time
@@ -1093,11 +1086,8 @@ SUBJECT MODIFIERS
replace=<string> specify a replacement string
startchar show startchar when relevant
startoffset=<n> same as offset=<n>
- substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
zero_terminate pass the subject as zero-terminated
@@ -1160,95 +1150,76 @@ SUBJECT MODIFIERS
the highest one actually used in the match are output (corresponding to
the return code from pcre2_match()). Groups that did not take part in
the match are output as "<unset>". This modifier is not relevant for
- DFA matching (which does no capturing) and does not apply when replace
- is specified; it is ignored, with a warning message, if present.
-
- Showing the entire ovector, for all outcomes
-
- The allvector modifier requests that the entire ovector be shown, what-
- ever the outcome of the match. Compare allcaptures, which shows only up
- to the maximum number of capture groups for the pattern, and then only
- for a successful complete non-DFA match. This modifier, which acts
- after any match result, and also for DFA matching, provides a means of
- checking that there are no unexpected modifications to ovector fields.
- Before each match attempt, the ovector is filled with a special value,
- and if this is found in both elements of a capturing pair,
- "<unchanged>" is output. After a successful match, this applies to all
- groups after the maximum capture group for the pattern. In other cases
- it applies to the entire ovector. After a partial match, the first two
- elements are the only ones that should be set. After a DFA match, the
- amount of ovector that is used depends on the number of matches that
- were found.
-
- Testing pattern callouts
-
- A callout function is supplied when pcre2test calls the library match-
- ing functions, unless callout_none is specified. Its behaviour can be
- controlled by various modifiers listed above whose names begin with
- callout_. Details are given in the section entitled "Callouts" below.
- Testing callouts from pcre2_substitute() is decribed separately in
- "Testing the substitution function" below.
+ DFA matching (which does no capturing); it is ignored, with a warning
+ message, if present.
+
+ Testing callouts
+
+ A callout function is supplied when pcre2test calls the library match-
+ ing functions, unless callout_none is specified. Its behaviour can be
+ controlled by various modifiers listed above whose names begin with
+ callout_. Details are given in the section entitled "Callouts" below.
Finding all matches in a string
Searching for all possible matches within a subject can be requested by
- the global or altglobal modifier. After finding a match, the matching
- function is called again to search the remainder of the subject. The
- difference between global and altglobal is that the former uses the
- start_offset argument to pcre2_match() or pcre2_dfa_match() to start
- searching at a new point within the entire string (which is what Perl
+ the global or altglobal modifier. After finding a match, the matching
+ function is called again to search the remainder of the subject. The
+ difference between global and altglobal is that the former uses the
+ start_offset argument to pcre2_match() or pcre2_dfa_match() to start
+ searching at a new point within the entire string (which is what Perl
does), whereas the latter passes over a shortened subject. This makes a
difference to the matching process if the pattern begins with a lookbe-
hind assertion (including \b or \B).
- If an empty string is matched, the next match is done with the
+ If an empty string is matched, the next match is done with the
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
for another, non-empty, match at the same point in the subject. If this
- match fails, the start offset is advanced, and the normal match is
- retried. This imitates the way Perl handles such cases when using the
- /g modifier or the split() function. Normally, the start offset is
- advanced by one character, but if the newline convention recognizes
- CRLF as a newline, and the current character is CR followed by LF, an
+ match fails, the start offset is advanced, and the normal match is
+ retried. This imitates the way Perl handles such cases when using the
+ /g modifier or the split() function. Normally, the start offset is
+ advanced by one character, but if the newline convention recognizes
+ CRLF as a newline, and the current character is CR followed by LF, an
advance of two characters occurs.
Testing substring extraction functions
- The copy and get modifiers can be used to test the pcre2_sub-
+ The copy and get modifiers can be used to test the pcre2_sub-
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
- given more than once, and each can specify a capture group name or num-
- ber, for example:
+ given more than once, and each can specify a group name or number, for
+ example:
abcd\=copy=1,copy=3,get=G1
- If the #subject command is used to set default copy and/or get lists,
- these can be unset by specifying a negative number to cancel all num-
+ If the #subject command is used to set default copy and/or get lists,
+ these can be unset by specifying a negative number to cancel all num-
bered groups and an empty name to cancel all named groups.
- The getall modifier tests pcre2_substring_list_get(), which extracts
+ The getall modifier tests pcre2_substring_list_get(), which extracts
all captured substrings.
- If the subject line is successfully matched, the substrings extracted
- by the convenience functions are output with C, G, or L after the
- string number instead of a colon. This is in addition to the normal
- full list. The string length (that is, the return from the extraction
+ If the subject line is successfully matched, the substrings extracted
+ by the convenience functions are output with C, G, or L after the
+ string number instead of a colon. This is in addition to the normal
+ full list. The string length (that is, the return from the extraction
function) is given in parentheses after each substring, followed by the
name when the extraction was by name.
Testing the substitution function
- If the replace modifier is set, the pcre2_substitute() function is
- called instead of one of the matching functions. Note that replacement
- strings cannot contain commas, because a comma signifies the end of a
+ If the replace modifier is set, the pcre2_substitute() function is
+ called instead of one of the matching functions. Note that replacement
+ strings cannot contain commas, because a comma signifies the end of a
modifier. This is not thought to be an issue in a test program.
- Unlike subject strings, pcre2test does not process replacement strings
- for escape sequences. In UTF mode, a replacement string is checked to
- see if it is a valid UTF-8 string. If so, it is correctly converted to
- a UTF string of the appropriate code unit width. If it is not a valid
- UTF-8 string, the individual code units are copied directly. This pro-
+ Unlike subject strings, pcre2test does not process replacement strings
+ for escape sequences. In UTF mode, a replacement string is checked to
+ see if it is a valid UTF-8 string. If so, it is correctly converted to
+ a UTF string of the appropriate code unit width. If it is not a valid
+ UTF-8 string, the individual code units are copied directly. This pro-
vides a means of passing an invalid UTF-8 string for testing purposes.
- The following modifiers set options (in additional to the normal match
+ The following modifiers set options (in additional to the normal match
options) for pcre2_substitute():
global PCRE2_SUBSTITUTE_GLOBAL
@@ -1258,8 +1229,8 @@ SUBJECT MODIFIERS
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
- After a successful substitution, the modified string is output, pre-
- ceded by the number of replacements. This may be zero if there were no
+ After a successful substitution, the modified string is output, pre-
+ ceded by the number of replacements. This may be zero if there were no
matches. Here is a simple example of a substitution test:
/abc/replace=xxx
@@ -1268,12 +1239,12 @@ SUBJECT MODIFIERS
=abc=abc=\=global
2: =xxx=xxx=
- Subject and replacement strings should be kept relatively short (fewer
- than 256 characters) for substitution tests, as fixed-size buffers are
- used. To make it easy to test for buffer overflow, if the replacement
- string starts with a number in square brackets, that number is passed
- to pcre2_substitute() as the size of the output buffer, with the
- replacement string starting at the next character. Here is an example
+ Subject and replacement strings should be kept relatively short (fewer
+ than 256 characters) for substitution tests, as fixed-size buffers are
+ used. To make it easy to test for buffer overflow, if the replacement
+ string starts with a number in square brackets, that number is passed
+ to pcre2_substitute() as the size of the output buffer, with the
+ replacement string starting at the next character. Here is an example
that tests the edge case:
/abc/
@@ -1282,15 +1253,14 @@ SUBJECT MODIFIERS
123abc123\=replace=[9]XYZ
Failed: error -47: no more memory
- The default action of pcre2_substitute() is to return
- PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
- the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
- stitute_overflow_length modifier), pcre2_substitute() continues to go
- through the motions of matching and substituting (but not doing any
- callouts), in order to compute the size of buffer that is required.
- When this happens, pcre2test shows the required buffer length (which
- includes space for the trailing zero) as part of the error message. For
- example:
+ The default action of pcre2_substitute() is to return
+ PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
+ the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
+ stitute_overflow_length modifier), pcre2_substitute() continues to go
+ through the motions of matching and substituting, in order to compute
+ the size of buffer that is required. When this happens, pcre2test shows
+ the required buffer length (which includes space for the trailing zero)
+ as part of the error message. For example:
/abc/substitute_overflow_length
123abc123\=replace=[9]XYZ
@@ -1300,204 +1270,162 @@ SUBJECT MODIFIERS
partial matching provokes an error return ("bad option value") from
pcre2_substitute().
- Testing substitute callouts
-
- If the substitute_callout modifier is set, a substitution callout func-
- tion is set up. The null_context modifier must not be set, because the
- address of the callout function is passed in a match context. When the
- callout function is called (after each substitution), details of the
- the input and output strings are output. For example:
-
- /abc/g,replace=<$0>,substitute_callout
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "<abc>"
- 2(1) Old 6 9 "abc" New 8 13 "<abc>"
- 2: <abc>def<abc>pqr
-
- The first number on each callout line is the count of matches. The
- parenthesized number is the number of pairs that are set in the ovector
- (that is, one more than the number of capturing groups that were set).
- Then are listed the offsets of the old substring, its contents, and the
- same for the replacement.
-
- By default, the substitution callout function returns zero, which
- accepts the replacement and causes matching to continue if /g was used.
- Two further modifiers can be used to test other return values. If sub-
- stitute_skip is set to a value greater than zero the callout function
- returns +1 for the match of that number, and similarly substitute_stop
- returns -1. These cause the replacement to be rejected, and -1 causes
- no further matching to take place. If either of them are set, substi-
- tute_callout is assumed. For example:
-
- /abc/g,replace=<$0>,substitute_skip=1
- abcdefabcpqr
- 1(1) Old 0 3 "abc" New 0 5 "<abc> SKIPPED"
- 2(1) Old 6 9 "abc" New 6 11 "<abc>"
- 2: abcdef<abc>pqr
- abcdefabcpqr\=substitute_stop=1
- 1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
- 1: abcdefabcpqr
-
- If both are set for the same number, stop takes precedence. Only a sin-
- gle skip or stop is supported, which is sufficient for testing that the
- feature works.
-
Setting the JIT stack size
- The jitstack modifier provides a way of setting the maximum stack size
- that is used by the just-in-time optimization code. It is ignored if
- JIT optimization is not being used. The value is a number of kibibytes
- (units of 1024 bytes). Setting zero reverts to the default of 32KiB.
+ The jitstack modifier provides a way of setting the maximum stack size
+ that is used by the just-in-time optimization code. It is ignored if
+ JIT optimization is not being used. The value is a number of kibibytes
+ (units of 1024 bytes). Setting zero reverts to the default of 32KiB.
Providing a stack that is larger than the default is necessary only for
- very complicated patterns. If jitstack is set non-zero on a subject
+ very complicated patterns. If jitstack is set non-zero on a subject
line it overrides any value that was set on the pattern.
Setting heap, match, and depth limits
- The heap_limit, match_limit, and depth_limit modifiers set the appro-
- priate limits in the match context. These values are ignored when the
+ The heap_limit, match_limit, and depth_limit modifiers set the appro-
+ priate limits in the match context. These values are ignored when the
find_limits modifier is specified.
Finding minimum limits
- If the find_limits modifier is present on a subject line, pcre2test
- calls the relevant matching function several times, setting different
- values in the match context via pcre2_set_heap_limit(),
- pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
- minimum values for each parameter that allows the match to complete
+ If the find_limits modifier is present on a subject line, pcre2test
+ calls the relevant matching function several times, setting different
+ values in the match context via pcre2_set_heap_limit(),
+ pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
+ minimum values for each parameter that allows the match to complete
without error. If JIT is being used, only the match limit is relevant.
When using this modifier, the pattern should not contain any limit set-
- tings such as (*LIMIT_MATCH=...) within it. If such a setting is
+ tings such as (*LIMIT_MATCH=...) within it. If such a setting is
present and is lower than the minimum matching value, the minimum value
- cannot be found because pcre2_set_match_limit() etc. are only able to
+ cannot be found because pcre2_set_match_limit() etc. are only able to
reduce the value of an in-pattern limit; they cannot increase it.
- For non-DFA matching, the minimum depth_limit number is a measure of
+ For non-DFA matching, the minimum depth_limit number is a measure of
how much nested backtracking happens (that is, how deeply the pattern's
- tree is searched). In the case of DFA matching, depth_limit controls
- the depth of recursive calls of the internal function that is used for
+ tree is searched). In the case of DFA matching, depth_limit controls
+ the depth of recursive calls of the internal function that is used for
handling pattern recursion, lookaround assertions, and atomic groups.
For non-DFA matching, the match_limit number is a measure of the amount
of backtracking that takes place, and learning the minimum value can be
- instructive. For most simple matches, the number is quite small, but
- for patterns with very large numbers of matching possibilities, it can
- become large very quickly with increasing length of subject string. In
- the case of DFA matching, match_limit controls the total number of
+ instructive. For most simple matches, the number is quite small, but
+ for patterns with very large numbers of matching possibilities, it can
+ become large very quickly with increasing length of subject string. In
+ the case of DFA matching, match_limit controls the total number of
calls, both recursive and non-recursive, to the internal matching func-
tion, thus controlling the overall amount of computing resource that is
used.
- For both kinds of matching, the heap_limit number, which is in
- kibibytes (units of 1024 bytes), limits the amount of heap memory used
+ For both kinds of matching, the heap_limit number, which is in
+ kibibytes (units of 1024 bytes), limits the amount of heap memory used
for matching. A value of zero disables the use of any heap memory; many
- simple pattern matches can be done without using the heap, so zero is
+ simple pattern matches can be done without using the heap, so zero is
not an unreasonable setting.
Showing MARK names
The mark modifier causes the names from backtracking control verbs that
- are returned from calls to pcre2_match() to be displayed. If a mark is
- returned for a match, non-match, or partial match, pcre2test shows it.
- For a match, it is on a line by itself, tagged with "MK:". Otherwise,
+ are returned from calls to pcre2_match() to be displayed. If a mark is
+ returned for a match, non-match, or partial match, pcre2test shows it.
+ For a match, it is on a line by itself, tagged with "MK:". Otherwise,
it is added to the non-match message.
Showing memory usage
- The memory modifier causes pcre2test to log the sizes of all heap mem-
- ory allocation and freeing calls that occur during a call to
- pcre2_match() or pcre2_dfa_match(). These occur only when a match
- requires a bigger vector than the default for remembering backtracking
- points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
- In many cases there will be no heap memory used and therefore no addi-
+ The memory modifier causes pcre2test to log the sizes of all heap mem-
+ ory allocation and freeing calls that occur during a call to
+ pcre2_match() or pcre2_dfa_match(). These occur only when a match
+ requires a bigger vector than the default for remembering backtracking
+ points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
+ In many cases there will be no heap memory used and therefore no addi-
tional output. No heap memory is allocated during matching with JIT, so
- in that case the memory modifier never has any effect. For this modi-
- fier to work, the null_context modifier must not be set on both the
+ in that case the memory modifier never has any effect. For this modi-
+ fier to work, the null_context modifier must not be set on both the
pattern and the subject, though it can be set on one or the other.
Setting a starting offset
- The offset modifier sets an offset in the subject string at which
+ The offset modifier sets an offset in the subject string at which
matching starts. Its value is a number of code units, not characters.
Setting an offset limit
- The offset_limit modifier sets a limit for unanchored matches. If a
+ The offset_limit modifier sets a limit for unanchored matches. If a
match cannot be found starting at or before this offset in the subject,
a "no match" return is given. The data value is a number of code units,
- not characters. When this modifier is used, the use_offset_limit modi-
+ not characters. When this modifier is used, the use_offset_limit modi-
fier must have been set for the pattern; if not, an error is generated.
Setting the size of the output vector
- The ovector modifier applies only to the subject line in which it
- appears, though of course it can also be used to set a default in a
- #subject command. It specifies the number of pairs of offsets that are
+ The ovector modifier applies only to the subject line in which it
+ appears, though of course it can also be used to set a default in a
+ #subject command. It specifies the number of pairs of offsets that are
available for storing matching information. The default is 15.
- A value of zero is useful when testing the POSIX API because it causes
+ A value of zero is useful when testing the POSIX API because it causes
regexec() to be called with a NULL capture vector. When not testing the
- POSIX API, a value of zero is used to cause pcre2_match_data_cre-
- ate_from_pattern() to be called, in order to create a match block of
+ POSIX API, a value of zero is used to cause pcre2_match_data_cre-
+ ate_from_pattern() to be called, in order to create a match block of
exactly the right size for the pattern. (It is not possible to create a
- match block with a zero-length ovector; there is always at least one
+ match block with a zero-length ovector; there is always at least one
pair of offsets.)
Passing the subject as zero-terminated
By default, the subject string is passed to a native API matching func-
tion with its correct length. In order to test the facility for passing
- a zero-terminated string, the zero_terminate modifier is provided. It
- causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
+ a zero-terminated string, the zero_terminate modifier is provided. It
+ causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
via the POSIX interface, this modifier is ignored, with a warning.
- When testing pcre2_substitute(), this modifier also has the effect of
+ When testing pcre2_substitute(), this modifier also has the effect of
passing the replacement string as zero-terminated.
Passing a NULL context
- Normally, pcre2test passes a context block to pcre2_match(),
- pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the
- null_context modifier is set, however, NULL is passed. This is for
- testing that the matching and substitution functions behave correctly
- in this case (they use default values). This modifier cannot be used
- with the find_limits or substitute_callout modifiers.
+ Normally, pcre2test passes a context block to pcre2_match(),
+ pcre2_dfa_match() or pcre2_jit_match(). If the null_context modifier is
+ set, however, NULL is passed. This is for testing that the matching
+ functions behave correctly in this case (they use default values). This
+ modifier cannot be used with the find_limits modifier or when testing
+ the substitution function.
THE ALTERNATIVE MATCHING FUNCTION
- By default, pcre2test uses the standard PCRE2 matching function,
+ By default, pcre2test uses the standard PCRE2 matching function,
pcre2_match() to match each subject line. PCRE2 also supports an alter-
- native matching function, pcre2_dfa_match(), which operates in a dif-
- ferent way, and has some restrictions. The differences between the two
+ native matching function, pcre2_dfa_match(), which operates in a dif-
+ ferent way, and has some restrictions. The differences between the two
functions are described in the pcre2matching documentation.
- If the dfa modifier is set, the alternative matching function is used.
- This function finds all possible matches at a given point in the sub-
- ject. If, however, the dfa_shortest modifier is set, processing stops
- after the first match is found. This is always the shortest possible
+ If the dfa modifier is set, the alternative matching function is used.
+ This function finds all possible matches at a given point in the sub-
+ ject. If, however, the dfa_shortest modifier is set, processing stops
+ after the first match is found. This is always the shortest possible
match.
DEFAULT OUTPUT FROM pcre2test
- This section describes the output when the normal matching function,
+ This section describes the output when the normal matching function,
pcre2_match(), is being used.
- When a match succeeds, pcre2test outputs the list of captured sub-
- strings, starting with number 0 for the string that matched the whole
- pattern. Otherwise, it outputs "No match" when the return is
- PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
- matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
- this is the entire substring that was inspected during the partial
- match; it may include characters before the actual match start if a
+ When a match succeeds, pcre2test outputs the list of captured sub-
+ strings, starting with number 0 for the string that matched the whole
+ pattern. Otherwise, it outputs "No match" when the return is
+ PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
+ matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
+ this is the entire substring that was inspected during the partial
+ match; it may include characters before the actual match start if a
lookbehind assertion, \K, \b, or \B was involved.)
For any other return, pcre2test outputs the PCRE2 negative error number
- and a short descriptive phrase. If the error is a failed UTF string
- check, the code unit offset of the start of the failing character is
+ and a short descriptive phrase. If the error is a failed UTF string
+ check, the code unit offset of the start of the failing character is
also output. Here is an example of an interactive pcre2test run.
$ pcre2test
@@ -1513,8 +1441,8 @@ DEFAULT OUTPUT FROM pcre2test
Unset capturing substrings that are not followed by one that is set are
not shown by pcre2test unless the allcaptures modifier is specified. In
the following example, there are two capturing substrings, but when the
- first data line is matched, the second, unset substring is not shown.
- An "internal" unset substring is shown as "<unset>", as for the second
+ first data line is matched, the second, unset substring is not shown.
+ An "internal" unset substring is shown as "<unset>", as for the second
data line.
re> /(a)|(b)/
@@ -1526,11 +1454,11 @@ DEFAULT OUTPUT FROM pcre2test
1: <unset>
2: b
- If the strings contain any non-printing characters, they are output as
- \xhh escapes if the value is less than 256 and UTF mode is not set.
+ If the strings contain any non-printing characters, they are output as
+ \xhh escapes if the value is less than 256 and UTF mode is not set.
Otherwise they are output as \x{hh...} escapes. See below for the defi-
- nition of non-printing characters. If the aftertext modifier is set,
- the output for substring 0 is followed by the the rest of the subject
+ nition of non-printing characters. If the aftertext modifier is set,
+ the output for substring 0 is followed by the the rest of the subject
string, identified by "0+" like this:
re> /cat/aftertext
@@ -1538,7 +1466,7 @@ DEFAULT OUTPUT FROM pcre2test
0: cat
0+ aract
- If global matching is requested, the results of successive matching
+ If global matching is requested, the results of successive matching
attempts are output in sequence, like this:
re> /\Bi(\w\w)/g
@@ -1550,8 +1478,8 @@ DEFAULT OUTPUT FROM pcre2test
0: ipp
1: pp
- "No match" is output only if the first match attempt fails. Here is an
- example of a failure message (the offset 4 that is specified by the
+ "No match" is output only if the first match attempt fails. Here is an
+ example of a failure message (the offset 4 that is specified by the
offset modifier is past the end of the subject string):
re> /xyz/
@@ -1559,7 +1487,7 @@ DEFAULT OUTPUT FROM pcre2test
Error -24 (bad offset value)
Note that whereas patterns can be continued over several lines (a plain
- ">" prompt is used for continuations), subject lines may not. However
+ ">" prompt is used for continuations), subject lines may not. However
newlines can be included in a subject by means of the \n escape (or \r,
\r\n, etc., depending on the newline sequence setting).
@@ -1567,7 +1495,7 @@ DEFAULT OUTPUT FROM pcre2test
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
When the alternative matching function, pcre2_dfa_match(), is used, the
- output consists of a list of all the matches that start at the first
+ output consists of a list of all the matches that start at the first
point in the subject where there is at least one match. For example:
re> /(tang|tangerine|tan)/
@@ -1576,11 +1504,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tang
2: tan
- Using the normal matching function on this data finds only "tang". The
- longest matching string is always given first (and numbered zero).
- After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
- followed by the partially matching substring. Note that this is the
- entire substring that was inspected during the partial match; it may
+ Using the normal matching function on this data finds only "tang". The
+ longest matching string is always given first (and numbered zero).
+ After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
+ followed by the partially matching substring. Note that this is the
+ entire substring that was inspected during the partial match; it may
include characters before the actual match start if a lookbehind asser-
tion, \b, or \B was involved. (\K is not supported for DFA matching.)
@@ -1596,16 +1524,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tan
0: tan
- The alternative matching function does not support substring capture,
- so the modifiers that are concerned with captured substrings are not
+ The alternative matching function does not support substring capture,
+ so the modifiers that are concerned with captured substrings are not
relevant.
RESTARTING AFTER A PARTIAL MATCH
- When the alternative matching function has given the PCRE2_ERROR_PAR-
+ When the alternative matching function has given the PCRE2_ERROR_PAR-
TIAL return, indicating that the subject partially matched the pattern,
- you can restart the match with additional subject data by means of the
+ you can restart the match with additional subject data by means of the
dfa_restart modifier. For example:
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
@@ -1614,37 +1542,37 @@ RESTARTING AFTER A PARTIAL MATCH
data> n05\=dfa,dfa_restart
0: n05
- For further information about partial matching, see the pcre2partial
+ For further information about partial matching, see the pcre2partial
documentation.
CALLOUTS
If the pattern contains any callout requests, pcre2test's callout func-
- tion is called during matching unless callout_none is specified. This
+ tion is called during matching unless callout_none is specified. This
works with both matching functions, and with JIT, though there are some
- differences in behaviour. The output for callouts with numerical argu-
+ differences in behaviour. The output for callouts with numerical argu-
ments and those with string arguments is slightly different.
Callouts with numerical arguments
By default, the callout function displays the callout number, the start
- and current positions in the subject text at the callout time, and the
+ and current positions in the subject text at the callout time, and the
next pattern item to be tested. For example:
--->pqrabcdef
0 ^ ^ \d
- This output indicates that callout number 0 occurred for a match
- attempt starting at the fourth character of the subject string, when
- the pointer was at the seventh character, and when the next pattern
- item was \d. Just one circumflex is output if the start and current
- positions are the same, or if the current position precedes the start
+ This output indicates that callout number 0 occurred for a match
+ attempt starting at the fourth character of the subject string, when
+ the pointer was at the seventh character, and when the next pattern
+ item was \d. Just one circumflex is output if the start and current
+ positions are the same, or if the current position precedes the start
position, which can happen if the callout is in a lookbehind assertion.
Callouts numbered 255 are assumed to be automatic callouts, inserted as
a result of the auto_callout pattern modifier. In this case, instead of
- showing the callout number, the offset in the pattern, preceded by a
+ showing the callout number, the offset in the pattern, preceded by a
plus, is output. For example:
re> /\d?[A-E]\*/auto_callout
@@ -1657,7 +1585,7 @@ CALLOUTS
0: E*
If a pattern contains (*MARK) items, an additional line is output when-
- ever a change of latest mark is passed to the callout function. For
+ ever a change of latest mark is passed to the callout function. For
example:
re> /a(*MARK:X)bc/auto_callout
@@ -1671,17 +1599,17 @@ CALLOUTS
+12 ^ ^
0: abc
- The mark changes between matching "a" and "b", but stays the same for
- the rest of the match, so nothing more is output. If, as a result of
- backtracking, the mark reverts to being unset, the text "<unset>" is
+ The mark changes between matching "a" and "b", but stays the same for
+ the rest of the match, so nothing more is output. If, as a result of
+ backtracking, the mark reverts to being unset, the text "<unset>" is
output.
Callouts with string arguments
The output for a callout with a string argument is similar, except that
- instead of outputting a callout number before the position indicators,
- the callout string and its offset in the pattern string are output
- before the reflection of the subject string, and the subject string is
+ instead of outputting a callout number before the position indicators,
+ the callout string and its offset in the pattern string are output
+ before the reflection of the subject string, and the subject string is
reflected for each callout. For example:
re> /^ab(?C'first')cd(?C"second")ef/
@@ -1697,26 +1625,26 @@ CALLOUTS
Callout modifiers
- The callout function in pcre2test returns zero (carry on matching) by
- default, but you can use a callout_fail modifier in a subject line to
+ The callout function in pcre2test returns zero (carry on matching) by
+ default, but you can use a callout_fail modifier in a subject line to
change this and other parameters of the callout (see below).
If the callout_capture modifier is set, the current captured groups are
output when a callout occurs. This is useful only for non-DFA matching,
- as pcre2_dfa_match() does not support capturing, so no captures are
+ as pcre2_dfa_match() does not support capturing, so no captures are
ever shown.
The normal callout output, showing the callout number or pattern offset
- (as described above) is suppressed if the callout_no_where modifier is
+ (as described above) is suppressed if the callout_no_where modifier is
set.
- When using the interpretive matching function pcre2_match() without
- JIT, setting the callout_extra modifier causes additional output from
- pcre2test's callout function to be generated. For the first callout in
- a match attempt at a new starting position in the subject, "New match
- attempt" is output. If there has been a backtrack since the last call-
+ When using the interpretive matching function pcre2_match() without
+ JIT, setting the callout_extra modifier causes additional output from
+ pcre2test's callout function to be generated. For the first callout in
+ a match attempt at a new starting position in the subject, "New match
+ attempt" is output. If there has been a backtrack since the last call-
out (or start of matching if this is the first callout), "Backtrack" is
- output, followed by "No other matching paths" if the backtrack ended
+ output, followed by "No other matching paths" if the backtrack ended
the previous match attempt. For example:
re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
@@ -1753,86 +1681,86 @@ CALLOUTS
+1 ^ a+
No match
- Notice that various optimizations must be turned off if you want all
- possible matching paths to be scanned. If no_start_optimize is not
- used, there is an immediate "no match", without any callouts, because
- the starting optimization fails to find "b" in the subject, which it
- knows must be present for any match. If no_auto_possess is not used,
- the "a+" item is turned into "a++", which reduces the number of back-
+ Notice that various optimizations must be turned off if you want all
+ possible matching paths to be scanned. If no_start_optimize is not
+ used, there is an immediate "no match", without any callouts, because
+ the starting optimization fails to find "b" in the subject, which it
+ knows must be present for any match. If no_auto_possess is not used,
+ the "a+" item is turned into "a++", which reduces the number of back-
tracks.
- The callout_extra modifier has no effect if used with the DFA matching
+ The callout_extra modifier has no effect if used with the DFA matching
function, or with JIT.
Return values from callouts
- The default return from the callout function is zero, which allows
+ The default return from the callout function is zero, which allows
matching to continue. The callout_fail modifier can be given one or two
numbers. If there is only one number, 1 is returned instead of 0 (caus-
ing matching to backtrack) when a callout of that number is reached. If
- two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
- reached and there have been at least <m> callouts. The callout_error
+ two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
+ reached and there have been at least <m> callouts. The callout_error
modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus-
- ing the entire matching process to be aborted. If both these modifiers
- are set for the same callout number, callout_error takes precedence.
- Note that callouts with string arguments are always given the number
+ ing the entire matching process to be aborted. If both these modifiers
+ are set for the same callout number, callout_error takes precedence.
+ Note that callouts with string arguments are always given the number
zero.
- The callout_data modifier can be given an unsigned or a negative num-
- ber. This is set as the "user data" that is passed to the matching
- function, and passed back when the callout function is invoked. Any
- value other than zero is used as a return from pcre2test's callout
+ The callout_data modifier can be given an unsigned or a negative num-
+ ber. This is set as the "user data" that is passed to the matching
+ function, and passed back when the callout function is invoked. Any
+ value other than zero is used as a return from pcre2test's callout
function.
Inserting callouts can be helpful when using pcre2test to check compli-
- cated regular expressions. For further information about callouts, see
+ cated regular expressions. For further information about callouts, see
the pcre2callout documentation.
NON-PRINTING CHARACTERS
When pcre2test is outputting text in the compiled version of a pattern,
- bytes other than 32-126 are always treated as non-printing characters
+ bytes other than 32-126 are always treated as non-printing characters
and are therefore shown as hex escapes.
- When pcre2test is outputting text that is a matched part of a subject
- string, it behaves in the same way, unless a different locale has been
- set for the pattern (using the locale modifier). In this case, the
- isprint() function is used to distinguish printing and non-printing
+ When pcre2test is outputting text that is a matched part of a subject
+ string, it behaves in the same way, unless a different locale has been
+ set for the pattern (using the locale modifier). In this case, the
+ isprint() function is used to distinguish printing and non-printing
characters.
SAVING AND RESTORING COMPILED PATTERNS
- It is possible to save compiled patterns on disc or elsewhere, and
+ It is possible to save compiled patterns on disc or elsewhere, and
reload them later, subject to a number of restrictions. JIT data cannot
- be saved. The host on which the patterns are reloaded must be running
+ be saved. The host on which the patterns are reloaded must be running
the same version of PCRE2, with the same code unit width, and must also
- have the same endianness, pointer width and PCRE2_SIZE type. Before
- compiled patterns can be saved they must be serialized, that is, con-
- verted to a stream of bytes. A single byte stream may contain any num-
- ber of compiled patterns, but they must all use the same character
+ have the same endianness, pointer width and PCRE2_SIZE type. Before
+ compiled patterns can be saved they must be serialized, that is, con-
+ verted to a stream of bytes. A single byte stream may contain any num-
+ ber of compiled patterns, but they must all use the same character
tables. A single copy of the tables is included in the byte stream (its
size is 1088 bytes).
- The functions whose names begin with pcre2_serialize_ are used for
- serializing and de-serializing. They are described in the pcre2serial-
+ The functions whose names begin with pcre2_serialize_ are used for
+ serializing and de-serializing. They are described in the pcre2serial-
ize documentation. In this section we describe the features of
pcre2test that can be used to test these functions.
- Note that "serialization" in PCRE2 does not convert compiled patterns
- to an abstract format like Java or .NET. It just makes a reloadable
+ Note that "serialization" in PCRE2 does not convert compiled patterns
+ to an abstract format like Java or .NET. It just makes a reloadable
byte code stream. Hence the restrictions on reloading mentioned above.
- In pcre2test, when a pattern with push modifier is successfully com-
- piled, it is pushed onto a stack of compiled patterns, and pcre2test
- expects the next line to contain a new pattern (or command) instead of
+ In pcre2test, when a pattern with push modifier is successfully com-
+ piled, it is pushed onto a stack of compiled patterns, and pcre2test
+ expects the next line to contain a new pattern (or command) instead of
a subject line. By contrast, the pushcopy modifier causes a copy of the
- compiled pattern to be stacked, leaving the original available for
+ compiled pattern to be stacked, leaving the original available for
immediate matching. By using push and/or pushcopy, a number of patterns
- can be compiled and retained. These modifiers are incompatible with
+ can be compiled and retained. These modifiers are incompatible with
posix, and control modifiers that act at match time are ignored (with a
- message) for the stacked patterns. The jitverify modifier applies only
+ message) for the stacked patterns. The jitverify modifier applies only
at compile time.
The command
@@ -1840,21 +1768,21 @@ SAVING AND RESTORING COMPILED PATTERNS
#save <filename>
causes all the stacked patterns to be serialized and the result written
- to the named file. Afterwards, all the stacked patterns are freed. The
+ to the named file. Afterwards, all the stacked patterns are freed. The
command
#load <filename>
- reads the data in the file, and then arranges for it to be de-serial-
- ized, with the resulting compiled patterns added to the pattern stack.
- The pattern on the top of the stack can be retrieved by the #pop com-
- mand, which must be followed by lines of subjects that are to be
- matched with the pattern, terminated as usual by an empty line or end
- of file. This command may be followed by a modifier list containing
- only control modifiers that act after a pattern has been compiled. In
+ reads the data in the file, and then arranges for it to be de-serial-
+ ized, with the resulting compiled patterns added to the pattern stack.
+ The pattern on the top of the stack can be retrieved by the #pop com-
+ mand, which must be followed by lines of subjects that are to be
+ matched with the pattern, terminated as usual by an empty line or end
+ of file. This command may be followed by a modifier list containing
+ only control modifiers that act after a pattern has been compiled. In
particular, hex, posix, posix_nosub, push, and pushcopy are not
- allowed, nor are any option-setting modifiers. The JIT modifiers are,
- however permitted. Here is an example that saves and reloads two pat-
+ allowed, nor are any option-setting modifiers. The JIT modifiers are,
+ however permitted. Here is an example that saves and reloads two pat-
terns.
/abc/push
@@ -1867,10 +1795,10 @@ SAVING AND RESTORING COMPILED PATTERNS
#pop jit,bincode
abc
- If jitverify is used with #pop, it does not automatically imply jit,
+ If jitverify is used with #pop, it does not automatically imply jit,
which is different behaviour from when it is used on a pattern.
- The #popcopy command is analagous to the pushcopy modifier in that it
+ The #popcopy command is analagous to the pushcopy modifier in that it
makes current a copy of the topmost stack pattern, leaving the original
still on the stack.
@@ -1890,5 +1818,5 @@ AUTHOR
REVISION
- Last updated: 11 March 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 21 July 2018
+ Copyright (c) 1997-2018 University of Cambridge.
diff --git a/dist2/doc/pcre2unicode.3 b/dist2/doc/pcre2unicode.3
index 56eb1eae..877d8878 100644
--- a/dist2/doc/pcre2unicode.3
+++ b/dist2/doc/pcre2unicode.3
@@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "06 March 2019" "PCRE2 10.33"
+.TH PCRE2UNICODE 3 "02 September 2018" "PCRE2 10.32"
.SH NAME
PCRE - Perl-compatible regular expressions (revised API)
.SH "UNICODE AND UTF SUPPORT"
@@ -27,11 +27,10 @@ case the library will be smaller.
.rs
.sp
When PCRE2 is built with Unicode support, the escape sequences \ep{..},
-\eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+\eP{..}, and \eX can be used. The Unicode properties that can be tested are
+limited to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
+the derived properties Any and L&. Full lists are given in the
.\" HREF
\fBpcre2pattern\fP
.\"
@@ -63,18 +62,13 @@ individual code units.
In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit.
.P
-In UTF modes, capture group names are not restricted to ASCII, and may contain
-any Unicode letters and decimal digits, as well as underscore.
-.P
The escape sequence \eC can be used to match a single code unit in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \eC in the
.\" HREF
\fBpcre2pattern\fP
.\"
-documentation). For this reason, there is a build-time option that disables
-support for \eC completely. There is also a less draconian compile-time option
-for locking out the use of \eC when a pattern is compiled.
+documentation).
.P
The use of \eC is not supported by the alternative matching function
\fBpcre2_dfa_match()\fP when in UTF-8 or UTF-16 mode, that is, when a character
@@ -124,152 +118,27 @@ few Unicode characters such as Greek sigma have more than two code points that
are case-equivalent, and these are treated as such.
.
.
-.\" HTML <a name="scriptruns"></a>
-.SH "SCRIPT RUNS"
-.rs
-.sp
-The pattern constructs (*script_run:...) and (*atomic_script_run:...), with
-synonyms (*sr:...) and (*asr:...), verify that the string matched within the
-parentheses is a script run. In concept, a script run is a sequence of
-characters that are all from the same Unicode script. However, because some
-scripts are commonly used together, and because some diacritical and other
-marks are used with multiple scripts, it is not that simple.
-.P
-Every Unicode character has a Script property, mostly with a value
-corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
-are also three special values:
-.P
-"Unknown" is used for code points that have not been assigned, and also for the
-surrogate code points. In the PCRE2 32-bit library, characters whose code
-points are greater than the Unicode maximum (U+10FFFF), which are accessible
-only in non-UTF mode, are assigned the Unknown script.
-.P
-"Common" is used for characters that are used with many scripts. These include
-punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII
-digits 0 to 9.
-.P
-"Inherited" is used for characters such as diacritical marks that modify a
-previous character. These are considered to take on the script of the character
-that they modify.
-.P
-Some Inherited characters are used with many scripts, but many of them are only
-normally used with a small number of scripts. For example, U+102E0 (Coptic
-Epact thousands mark) is used only with Arabic and Coptic. In order to make it
-possible to check this, a Unicode property called Script Extension exists. Its
-value is a list of scripts that apply to the character. For the majority of
-characters, the list contains just one script, the same one as the Script
-property. However, for characters such as U+102E0 more than one Script is
-listed. There are also some Common characters that have a single, non-Common
-script in their Script Extension list.
-.P
-The next section describes the basic rules for deciding whether a given string
-of characters is a script run. Note, however, that there are some special cases
-involving the Chinese Han script, and an additional constraint for decimal
-digits. These are covered in subsequent sections.
-.
-.
-.SS "Basic script run rules"
-.rs
-.sp
-A string that is less than two characters long is a script run. This is the
-only case in which an Unknown character can be part of a script run. Longer
-strings are checked using only the Script Extensions property, not the basic
-Script property.
-.P
-If a character's Script Extension property is the single value "Inherited", it
-is always accepted as part of a script run. This is also true for the property
-"Common", subject to the checking of decimal digits described below. All the
-remaining characters in a script run must have at least one script in common in
-their Script Extension lists. In set-theoretic terminology, the intersection of
-all the sets of scripts must not be empty.
-.P
-A simple example is an Internet name such as "google.com". The letters are all
-in the Latin script, and the dot is Common, so this string is a script run.
-However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
-string that looks the same, but with Cyrillic "o"s is not a script run.
-.P
-More interesting examples involve characters with more than one script in their
-Script Extension. Consider the following characters:
-.sp
- U+060C Arabic comma
- U+06D4 Arabic full stop
-.sp
-The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
-Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
-appear in script runs of either Arabic or Hanifi Rohingya. The first could also
-appear in Syriac or Thaana script runs, but the second could not.
-.
-.
-.SS "The Chinese Han script"
-.rs
-.sp
-The Chinese Han script is commonly used in conjunction with other scripts for
-writing certain languages. Japanese uses the Hiragana and Katakana scripts
-together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
-and Han. These three combinations are treated as special cases when checking
-script runs and are, in effect, "virtual scripts". Thus, a script run may
-contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and
-Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of
-Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical
-Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/)
-in allowing such mixtures.
-.
-.
-.SS "Decimal digits"
-.rs
-.sp
-Unicode contains many sets of 10 decimal digits in different scripts, and some
-scripts (including the Common script) contain more than one set. Some of these
-decimal digits them are visually indistinguishable from the common ASCII
-digits. In addition to the script checking described above, if a script run
-contains any decimal digits, they must all come from the same set of 10
-adjacent characters.
-.
-.
.SH "VALIDITY OF UTF STRINGS"
.rs
.sp
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
-are (by default) checked for validity on entry to the relevant functions. If an
-invalid UTF string is passed, an negative error code is returned. The code unit
-offset to the offending character can be extracted from the match data block by
-calling \fBpcre2_get_startchar()\fP, which is used for this purpose after a UTF
-error.
-.P
-In some situations, you may already know that your strings are valid, and
-therefore want to skip these checks in order to improve performance, for
-example in the case of a long subject string that is being scanned repeatedly.
-If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
-PCRE2 assumes that the pattern or subject it is given (respectively) contains
-only valid UTF code unit sequences.
-.P
-If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
-is usually undefined and your program may crash or loop indefinitely. There is,
-however, one mode of matching that can handle invalid UTF subject strings. This
-is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option
-when calling \fBpcre2_jit_compile()\fP. For details, see the
-.\" HREF
-\fBpcre2jit\fP
-.\"
-documentation.
-.P
-Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
-the pattern; it does not also apply to subject strings. If you want to disable
-the check for a subject string you must pass this same option to
-\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP.
+are (by default) checked for validity on entry to the relevant functions.
+If an invalid UTF string is passed, an negative error code is returned. The
+code unit offset to the offending character can be extracted from the match
+data block by calling \fBpcre2_get_startchar()\fP, which is used for this
+purpose after a UTF error.
.P
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
strings to be in host byte order.
.P
-Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other
-processing takes place. In the case of \fBpcre2_match()\fP and
-\fBpcre2_dfa_match()\fP calls with a non-zero starting offset, the check is
-applied only to that part of the subject that could be inspected during
-matching, and there is a check that the starting offset points to the first
-code unit of a character or to the end of the subject. If there are no
-lookbehind assertions in the pattern, the check starts at the starting offset.
-Otherwise, it starts at the length of the longest lookbehind before the
+A UTF string is checked before any other processing takes place. In the case of
+\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP calls with a non-zero starting
+offset, the check is applied only to that part of the subject that could be
+inspected during matching, and there is a check that the starting offset points
+to the first code unit of a character or to the end of the subject. If there
+are no lookbehind assertions in the pattern, the check starts at the starting
+offset. Otherwise, it starts at the length of the longest lookbehind before the
starting offset, or at the start of the subject if there are not that many
characters before the starting offset. Note that the sequences \eb and \eB are
one-character lookbehinds.
@@ -286,12 +155,28 @@ independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
UTF-32.)
.P
-Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is
-given if an escape sequence for an invalid Unicode code point is encountered in
-the pattern. If you want to allow escape sequences such as \ex{d800} (a
-surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
-option. However, this is possible only in UTF-8 and UTF-32 modes, because these
-values are not representable in UTF-16.
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
+PCRE2 assumes that the pattern or subject it is given (respectively) contains
+only valid UTF code unit sequences.
+.P
+Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
+the pattern; it does not also apply to subject strings. If you want to disable
+the check for a subject string you must pass this option to \fBpcre2_match()\fP
+or \fBpcre2_dfa_match()\fP.
+.P
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely.
+.P
+Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
+that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. If you want to allow escape sequences such as
+\ex{d800} (a surrogate code point) you can set the
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
+only in UTF-8 and UTF-32 modes, because these values are not representable in
+UTF-16.
.
.
.\" HTML <a name="utf8strings"></a>
@@ -400,6 +285,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 06 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 02 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/dist2/install-sh b/dist2/install-sh
index 8175c640..0360b79e 100755
--- a/dist2/install-sh
+++ b/dist2/install-sh
@@ -1,7 +1,7 @@
#!/bin/sh
# install - install a program, script, or datafile
-scriptversion=2018-03-11.20; # UTC
+scriptversion=2016-01-11.22; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
@@ -271,18 +271,15 @@ do
fi
dst=$dst_arg
- # If destination is a directory, append the input filename.
+ # If destination is a directory, append the input filename; won't work
+ # if double slashes aren't ignored.
if test -d "$dst"; then
if test "$is_target_a_directory" = never; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
- dstbase=`basename "$src"`
- case $dst in
- */) dst=$dst$dstbase;;
- *) dst=$dst/$dstbase;;
- esac
+ dst=$dstdir/`basename "$src"`
dstdir_status=0
else
dstdir=`dirname "$dst"`
@@ -291,11 +288,6 @@ do
fi
fi
- case $dstdir in
- */) dstdirslash=$dstdir;;
- *) dstdirslash=$dstdir/;;
- esac
-
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
@@ -332,43 +324,34 @@ do
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
- # Note that $RANDOM variable is not portable (e.g. dash); Use it
- # here however when possible just to lower collision chance.
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
- trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0
-
- # Because "mkdir -p" follows existing symlinks and we likely work
- # directly in world-writeable /tmp, make sure that the '$tmpdir'
- # directory is successfully created first before we actually test
- # 'mkdir -p' feature.
if (umask $mkdir_umask &&
- $mkdirprog $mkdir_mode "$tmpdir" &&
- exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1
+ exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
- test_tmpdir="$tmpdir/a"
- ls_ld_tmpdir=`ls -ld "$test_tmpdir"`
+ ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
- $mkdirprog -m$different_mode -p -- "$test_tmpdir" && {
- ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"`
+ $mkdirprog -m$different_mode -p -- "$tmpdir" && {
+ ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
- rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir"
+ rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
- rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null
+ rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
@@ -444,8 +427,8 @@ do
else
# Make a couple of temp file names in the proper directory.
- dsttmp=${dstdirslash}_inst.$$_
- rmtmp=${dstdirslash}_rm.$$_
+ dsttmp=$dstdir/_inst.$$_
+ rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
@@ -510,7 +493,7 @@ do
done
# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
diff --git a/dist2/ltmain.sh b/dist2/ltmain.sh
index d3ab94d6..857338fe 100644
--- a/dist2/ltmain.sh
+++ b/dist2/ltmain.sh
@@ -1,12 +1,12 @@
#! /bin/sh
## DO NOT EDIT - This file generated from ./build-aux/ltmain.in
-## by inline-source v2018-07-24.06
+## by inline-source v2016-02-21.11
-# libtool (GNU libtool) 2.4.6.42-b88ce
+# libtool (GNU libtool) 2.4.6.40-6ca5-dirty
# Provide generalized library-building support services.
# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# This is free software; see the source for copying conditions. There is NO
# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -31,8 +31,8 @@
PROGRAM=libtool
PACKAGE=libtool
-VERSION=2.4.6.42-b88ce
-package_revision=2.4.6.42
+VERSION=2.4.6.40-6ca5-dirty
+package_revision=2.4.6.40
## ------ ##
@@ -64,7 +64,7 @@ package_revision=2.4.6.42
# libraries, which are installed to $pkgauxdir.
# Set a version string for this script.
-scriptversion=2018-07-24.06; # UTC
+scriptversion=2017-04-19.12; # UTC
# General shell script boiler plate, and helper functions.
# Written by Gary V. Vaughan, 2004
@@ -72,7 +72,7 @@ scriptversion=2018-07-24.06; # UTC
# This is free software. There is NO warranty; not even for
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
-# Copyright (C) 2004-2018 Bootstrap Authors
+# Copyright (C) 2004-2017 Bootstrap Authors
#
# This file is dual licensed under the terms of the MIT license
# <https://opensource.org/license/MIT>, and GPL version 3 or later
@@ -1497,7 +1497,7 @@ func_lt_ver ()
# This is free software. There is NO warranty; not even for
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
-# Copyright (C) 2010-2018 Bootstrap Authors
+# Copyright (C) 2010-2017 Bootstrap Authors
#
# This file is dual licensed under the terms of the MIT license
# <https://opensource.org/license/MIT>, and GPL version 3 or later
@@ -1510,7 +1510,7 @@ func_lt_ver ()
# <https://github.com/gnulib-modules/bootstrap/issues>
# Set a version string for this script.
-scriptversion=2018-07-24.06; # UTC
+scriptversion=2016-03-06.01; # UTC
## ------ ##
@@ -2171,12 +2171,12 @@ func_version ()
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC"
+# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC"
# time-stamp-time-zone: "UTC"
# End:
# Set a version string.
-scriptversion='(GNU libtool) 2.4.6.42-b88ce'
+scriptversion='(GNU libtool) 2.4.6.40-6ca5-dirty'
# func_echo ARG...
@@ -2267,7 +2267,7 @@ include the following information:
compiler: $LTCC
compiler flags: $LTCFLAGS
linker: $LD (gnu? $with_gnu_ld)
- version: $progname (GNU libtool) 2.4.6.42-b88ce
+ version: $progname (GNU libtool) 2.4.6.40-6ca5-dirty
automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q`
autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q`
diff --git a/dist2/m4/libtool.m4 b/dist2/m4/libtool.m4
index b55a6e57..597c6042 100644
--- a/dist2/m4/libtool.m4
+++ b/dist2/m4/libtool.m4
@@ -1,6 +1,6 @@
# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
#
-# Copyright (C) 1996-2001, 2003-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2001, 2003-2017 Free Software Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is free software; the Free Software Foundation gives
@@ -219,8 +219,8 @@ esac
ofile=libtool
can_build_shared=yes
-# All known linkers require a '.a' archive for static linking (except MSVC and
-# ICC, which need '.lib').
+# All known linkers require a '.a' archive for static linking (except MSVC,
+# which needs '.lib').
libext=a
with_gnu_ld=$lt_cv_prog_gnu_ld
@@ -2587,8 +2587,8 @@ m4_if([$1], [],[
dynamic_linker='Win32 ld.exe'
;;
- *,cl* | *,icl*)
- # Native MSVC or ICC
+ *,cl*)
+ # Native MSVC
libname_spec='$name'
soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext'
library_names_spec='$libname.dll.lib'
@@ -2644,7 +2644,7 @@ m4_if([$1], [],[
;;
*)
- # Assume MSVC and ICC wrapper
+ # Assume MSVC wrapper
library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib'
dynamic_linker='Win32 ld.exe'
;;
@@ -4032,7 +4032,7 @@ for ac_symprfx in "" "_"; do
if test "$lt_cv_nm_interface" = "MS dumpbin"; then
# Fake it for dumpbin and say T for any non-static function,
# D for any global variable and I for any imported variable.
- # Also find C++ and __fastcall symbols from MSVC++ or ICC,
+ # Also find C++ and __fastcall symbols from MSVC++,
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK ['"\
" {last_section=section; section=\$ 3};"\
@@ -4949,7 +4949,7 @@ m4_if([$1], [CXX], [
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl* | icl*)
+ cl*)
_LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
;;
*)
@@ -5006,15 +5006,15 @@ dnl Note also adjust exclude_expsyms for C++ above.
case $host_os in
cygwin* | mingw* | pw32* | cegcc*)
- # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time
+ # FIXME: the MSVC++ port hasn't been tested in a loooong time
# When not using gcc, we currently assume that we are using
- # Microsoft Visual C++ or Intel C++ Compiler.
+ # Microsoft Visual C++.
if test yes != "$GCC"; then
with_gnu_ld=no
fi
;;
interix*)
- # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC)
+ # we just hope/assume this is gcc and not c89 (= MSVC++)
with_gnu_ld=yes
;;
openbsd* | bitrig*)
@@ -5579,12 +5579,12 @@ _LT_EOF
cygwin* | mingw* | pw32* | cegcc*)
# When not using gcc, we currently assume that we are using
- # Microsoft Visual C++ or Intel C++ Compiler.
+ # Microsoft Visual C++.
# hardcode_libdir_flag_spec is actually meaningless, as there is
# no search path for DLLs.
case $cc_basename in
- cl* | icl*)
- # Native MSVC or ICC
+ cl*)
+ # Native MSVC
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
_LT_TAGVAR(always_export_symbols, $1)=yes
@@ -5625,7 +5625,7 @@ _LT_EOF
fi'
;;
*)
- # Assume MSVC and ICC wrapper
+ # Assume MSVC wrapper
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
# Tell ltmain to make .lib files, not .a files.
@@ -6655,8 +6655,8 @@ if test yes != "$_lt_caught_CXX_error"; then
cygwin* | mingw* | pw32* | cegcc*)
case $GXX,$cc_basename in
- ,cl* | no,cl* | ,icl* | no,icl*)
- # Native MSVC or ICC
+ ,cl* | no,cl*)
+ # Native MSVC
# hardcode_libdir_flag_spec is actually meaningless, as there is
# no search path for DLLs.
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
diff --git a/dist2/m4/ltoptions.m4 b/dist2/m4/ltoptions.m4
index 07421d92..621bd18b 100644
--- a/dist2/m4/ltoptions.m4
+++ b/dist2/m4/ltoptions.m4
@@ -1,6 +1,6 @@
# Helper functions for option handling. -*- Autoconf -*-
#
-# Copyright (C) 2004-2005, 2007-2009, 2011-2018 Free Software
+# Copyright (C) 2004-2005, 2007-2009, 2011-2017 Free Software
# Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
diff --git a/dist2/m4/ltsugar.m4 b/dist2/m4/ltsugar.m4
index 3985c568..ab69a6b9 100644
--- a/dist2/m4/ltsugar.m4
+++ b/dist2/m4/ltsugar.m4
@@ -1,6 +1,6 @@
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
-# Copyright (C) 2004-2005, 2007-2008, 2011-2018 Free Software
+# Copyright (C) 2004-2005, 2007-2008, 2011-2017 Free Software
# Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
diff --git a/dist2/m4/ltversion.m4 b/dist2/m4/ltversion.m4
index 86b2ad72..8250ea47 100644
--- a/dist2/m4/ltversion.m4
+++ b/dist2/m4/ltversion.m4
@@ -1,6 +1,6 @@
# ltversion.m4 -- version numbers -*- Autoconf -*-
#
-# Copyright (C) 2004, 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2004, 2011-2017 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004
#
# This file is free software; the Free Software Foundation gives
@@ -9,15 +9,15 @@
# @configure_input@
-# serial 4221 ltversion.m4
+# serial 4219 ltversion.m4
# This file is part of GNU Libtool
-m4_define([LT_PACKAGE_VERSION], [2.4.6.42-b88ce])
-m4_define([LT_PACKAGE_REVISION], [2.4.6.42])
+m4_define([LT_PACKAGE_VERSION], [2.4.6.40-6ca5-dirty])
+m4_define([LT_PACKAGE_REVISION], [2.4.6.40])
AC_DEFUN([LTVERSION_VERSION],
-[macro_version='2.4.6.42-b88ce'
-macro_revision='2.4.6.42'
+[macro_version='2.4.6.40-6ca5-dirty'
+macro_revision='2.4.6.40'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])
diff --git a/dist2/m4/lt~obsolete.m4 b/dist2/m4/lt~obsolete.m4
index 54ea1c42..9919d4de 100644
--- a/dist2/m4/lt~obsolete.m4
+++ b/dist2/m4/lt~obsolete.m4
@@ -1,6 +1,6 @@
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
-# Copyright (C) 2004-2005, 2007, 2009, 2011-2018 Free Software
+# Copyright (C) 2004-2005, 2007, 2009, 2011-2017 Free Software
# Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
diff --git a/dist2/missing b/dist2/missing
index 625aeb11..c6e37958 100755
--- a/dist2/missing
+++ b/dist2/missing
@@ -1,9 +1,9 @@
#! /bin/sh
# Common wrapper for a few potentially missing GNU programs.
-scriptversion=2018-03-07.03; # UTC
+scriptversion=2016-01-11.22; # UTC
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
@@ -17,7 +17,7 @@ scriptversion=2018-03-07.03; # UTC
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -101,9 +101,9 @@ else
exit $st
fi
-perl_URL=https://www.perl.org/
-flex_URL=https://github.com/westes/flex
-gnu_software_URL=https://www.gnu.org/software
+perl_URL=http://www.perl.org/
+flex_URL=http://flex.sourceforge.net/
+gnu_software_URL=http://www.gnu.org/software
program_details ()
{
@@ -207,7 +207,7 @@ give_advice "$1" | sed -e '1s/^/WARNING: /' \
exit $st
# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
diff --git a/dist2/perltest.sh b/dist2/perltest.sh
index 406a14e0..5e6c466b 100755
--- a/dist2/perltest.sh
+++ b/dist2/perltest.sh
@@ -1,10 +1,8 @@
#! /bin/sh
# Script for testing regular expressions with perl to check that PCRE2 handles
-# them the same. For testing with different versions of Perl, if the first
-# argument is -perl then the second is taken as the Perl command to use, and
-# both are then removed. If the next argument is "-w", Perl is called with
-# "-w", which turns on its warning mode.
+# them the same. If the first argument to this script is "-w", Perl is also
+# called with "-w", which turns on its warning mode.
#
# The Perl code has to have "use utf8" and "require Encode" at the start when
# running UTF-8 tests, but *not* for non-utf8 tests. (The "require" would
@@ -12,8 +10,8 @@
# the script will always run for these tests.)
#
# The desired effect is achieved by making this a shell script that passes the
-# Perl script to Perl through a pipe. If the next argument is "-utf8", a
-# suitable prefix is set up.
+# Perl script to Perl through a pipe. If the first argument (possibly after
+# removing "-w") is "-utf8", a suitable prefix is set up.
#
# The remaining arguments, if any, are passed to Perl. They are an input file
# and an output file. If there is one argument, the output is written to
@@ -25,12 +23,6 @@ perl=perl
perlarg=''
prefix=''
-if [ $# -gt 1 -a "$1" = "-perl" ] ; then
- shift
- perl=$1
- shift
-fi
-
if [ $# -gt 0 -a "$1" = "-w" ] ; then
perlarg="-w"
shift
@@ -83,11 +75,6 @@ fi
(echo "$prefix" ; cat <<'PERLEND'
-# The alpha assertions currently give warnings even when -w is not specified.
-
-no warnings "experimental::alpha_assertions";
-no warnings "experimental::script_run";
-
# Function for turning a string into a string of printing chars.
sub pchars {
@@ -142,9 +129,6 @@ else { $outfile = "STDOUT"; }
printf($outfile "Perl $] Regular Expressions\n\n");
-$extra_modifiers = "";
-$default_show_mark = 0;
-
# Main loop
NEXT_RE:
@@ -386,10 +370,7 @@ for (;;)
}
}
-# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode
-# "main::OUTFILE" used only once".
-
-close(OUTFILE) if $outfile eq "OUTFILE";
+# printf $outfile "\n";
PERLEND
) | $perl $perlarg - $@
diff --git a/dist2/src/config.h.generic b/dist2/src/config.h.generic
index 25d45eeb..89a52ef8 100644
--- a/dist2/src/config.h.generic
+++ b/dist2/src/config.h.generic
@@ -35,10 +35,6 @@ sure both macros are undefined; an emulation function will then be used. */
*/
/* #undef BSR_ANYCRLF */
-/* Define to any value to disable the use of the z and t modifiers in
- formatting settings such as %zu or %td (this is rarely needed). */
-/* #undef DISABLE_PERCENT_ZT */
-
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
@@ -218,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.33"
+#define PACKAGE_STRING "PCRE2 10.32"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@@ -227,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "10.33"
+#define PACKAGE_VERSION "10.32"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@@ -303,11 +299,6 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable callout script support in pcre2grep. */
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
-/* Define to any value to enable fork support in pcre2grep callout scripts.
- This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
- */
-/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
-
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
/* #undef SUPPORT_PCRE2GREP_JIT */
@@ -352,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
#endif
/* Version number of package */
-#define VERSION "10.33"
+#define VERSION "10.32"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
diff --git a/dist2/src/config.h.in b/dist2/src/config.h.in
index 6b8eb7e9..d8a5280f 100644
--- a/dist2/src/config.h.in
+++ b/dist2/src/config.h.in
@@ -35,10 +35,6 @@ sure both macros are undefined; an emulation function will then be used. */
*/
#undef BSR_ANYCRLF
-/* Define to any value to disable the use of the z and t modifiers in
- formatting settings such as %zu or %td (this is rarely needed). */
-#undef DISABLE_PERCENT_ZT
-
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
@@ -290,11 +286,6 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable callout script support in pcre2grep. */
#undef SUPPORT_PCRE2GREP_CALLOUT
-/* Define to any value to enable fork support in pcre2grep callout scripts.
- This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
- */
-#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
-
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
#undef SUPPORT_PCRE2GREP_JIT
diff --git a/dist2/src/dftables.c b/dist2/src/dftables.c
index 02796cc6..c0af3625 100644
--- a/dist2/src/dftables.c
+++ b/dist2/src/dftables.c
@@ -183,10 +183,10 @@ fprintf(f,
"/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n"
" 0x%02x letter\n"
- " 0x%02x lower case letter\n"
" 0x%02x decimal digit\n"
+ " 0x%02x hexadecimal digit\n"
" 0x%02x alphanumeric or '_'\n*/\n\n",
- ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
+ ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word);
fprintf(f, " ");
for (i = 0; i < 256; i++)
diff --git a/dist2/src/pcre2.h.generic b/dist2/src/pcre2.h.generic
index 102b5d91..3d2feb7a 100644
--- a/dist2/src/pcre2.h.generic
+++ b/dist2/src/pcre2.h.generic
@@ -42,9 +42,15 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
-#define PCRE2_MINOR 33
+#define PCRE2_MINOR 32
#define PCRE2_PRERELEASE
-#define PCRE2_DATE 2019-04-16
+#define PCRE2_DATE 2018-09-10
+
+/* For the benefit of systems without stdint.h, an alternative is to use
+inttypes.h. The existence of these headers is checked by configure or CMake. */
+
+#define PCRE2_HAVE_STDINT_H 1
+#define PCRE2_HAVE_INTTYPES_H 1
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@@ -81,15 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION
#endif
-/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
-uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
-not have stdint.h, which is why we use inttypes.h, which according to the C
-standard is a superset of stdint.h. If none of these headers are available,
-the relevant values must be provided by some other means. */
+/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
+that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
+header, the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
+
+#if PCRE2_HAVE_STDINT_H
+#include <stdint.h>
+#elif PCRE2_HAVE_INTTYPES_H
#include <inttypes.h>
+#endif
/* Allow for C++ users compiling this directly. */
@@ -149,37 +158,43 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
-#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
-#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
/* These are for pcre2_jit_compile(). */
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
-#define PCRE2_JIT_INVALID_UTF 0x00000100u
-
-/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
-pcre2_substitute(). Some are allowed only for one of the functions, and in
-these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_NO_UTF_CHECK can also be passed to these functions (though
-pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
-
-#define PCRE2_NOTBOL 0x00000001u
-#define PCRE2_NOTEOL 0x00000002u
-#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
-#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
-#define PCRE2_PARTIAL_SOFT 0x00000010u
-#define PCRE2_PARTIAL_HARD 0x00000020u
-#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
-#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
-#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
-#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
-#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
+
+/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
+that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
+functions (though pcre2_jit_match() ignores the latter since it bypasses all
+sanity checks). */
+
+#define PCRE2_NOTBOL 0x00000001u
+#define PCRE2_NOTEOL 0x00000002u
+#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
+#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
+#define PCRE2_PARTIAL_SOFT 0x00000010u
+#define PCRE2_PARTIAL_HARD 0x00000020u
+
+/* These are additional options for pcre2_dfa_match(). */
+
+#define PCRE2_DFA_RESTART 0x00000040u
+#define PCRE2_DFA_SHORTEST 0x00000080u
+
+/* These are additional options for pcre2_substitute(), which passes any others
+through to pcre2_match(). */
+
+#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
+#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
+#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
+#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
+#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
+
+/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
+ignored for pcre2_jit_match(). */
+
+#define PCRE2_NO_JIT 0x00002000u
/* Options for pcre2_pattern_convert(). */
@@ -303,8 +318,6 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
-#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
-#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
/* "Expected" matching error codes: no match and partial match. */
@@ -491,10 +504,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
-/* The structures for passing out data via callout functions. We use structures
-so that new fields can be added on the end in future versions, without changing
-the API of the function, thereby allowing old clients to work without
-modification. Define the generic versions in a macro; the width-specific
+/* The structure for passing out data via the pcre_callout_function. We use a
+structure so that new fields can be added on the end in future versions,
+without changing the API of the function, thereby allowing old clients to work
+without modification. Define the generic version in a macro; the width-specific
versions are generated from this macro below. */
/* Flags for the callout_flags field. These are cleared after a callout. */
@@ -536,19 +549,7 @@ typedef struct pcre2_callout_enumerate_block { \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
-} pcre2_callout_enumerate_block; \
-\
-typedef struct pcre2_substitute_callout_block { \
- uint32_t version; /* Identifies version of block */ \
- /* ------------------------ Version 0 ------------------------------- */ \
- PCRE2_SPTR input; /* Pointer to input subject string */ \
- PCRE2_SPTR output; /* Pointer to output buffer */ \
- PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
- PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
- uint32_t oveccount; /* Count of pairs set in ovector */ \
- uint32_t subscount; /* Substitution number */ \
- /* ------------------------------------------------------------------ */ \
-} pcre2_substitute_callout_block;
+} pcre2_callout_enumerate_block;
/* List the generic forms of all other functions in macros, which will be
@@ -604,9 +605,6 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
- pcre2_set_substitute_callout(pcre2_match_context *, \
- int (*)(pcre2_substitute_callout_block *, void *), void *); \
-PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
@@ -809,7 +807,6 @@ pcre2_compile are called by application code. */
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
-#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
@@ -875,7 +872,6 @@ pcre2_compile are called by application code. */
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
-#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
diff --git a/dist2/src/pcre2.h.in b/dist2/src/pcre2.h.in
index 9415d702..a9396e02 100644
--- a/dist2/src/pcre2.h.in
+++ b/dist2/src/pcre2.h.in
@@ -46,6 +46,12 @@ POSSIBILITY OF SUCH DAMAGE.
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
#define PCRE2_DATE @PCRE2_DATE@
+/* For the benefit of systems without stdint.h, an alternative is to use
+inttypes.h. The existence of these headers is checked by configure or CMake. */
+
+#define PCRE2_HAVE_STDINT_H @PCRE2_HAVE_STDINT_H@
+#define PCRE2_HAVE_INTTYPES_H @PCRE2_HAVE_INTTYPES_H@
+
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
export setting is defined in pcre2_internal.h, which includes this file. So we
@@ -81,15 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION
#endif
-/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
-uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
-not have stdint.h, which is why we use inttypes.h, which according to the C
-standard is a superset of stdint.h. If none of these headers are available,
-the relevant values must be provided by some other means. */
+/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
+that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
+header, the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
+
+#if PCRE2_HAVE_STDINT_H
+#include <stdint.h>
+#elif PCRE2_HAVE_INTTYPES_H
#include <inttypes.h>
+#endif
/* Allow for C++ users compiling this directly. */
@@ -149,37 +158,43 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
-#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
-#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
/* These are for pcre2_jit_compile(). */
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
-#define PCRE2_JIT_INVALID_UTF 0x00000100u
-
-/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
-pcre2_substitute(). Some are allowed only for one of the functions, and in
-these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_NO_UTF_CHECK can also be passed to these functions (though
-pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
-
-#define PCRE2_NOTBOL 0x00000001u
-#define PCRE2_NOTEOL 0x00000002u
-#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
-#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
-#define PCRE2_PARTIAL_SOFT 0x00000010u
-#define PCRE2_PARTIAL_HARD 0x00000020u
-#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
-#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
-#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
-#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
-#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
-#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
+
+/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
+that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
+functions (though pcre2_jit_match() ignores the latter since it bypasses all
+sanity checks). */
+
+#define PCRE2_NOTBOL 0x00000001u
+#define PCRE2_NOTEOL 0x00000002u
+#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
+#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
+#define PCRE2_PARTIAL_SOFT 0x00000010u
+#define PCRE2_PARTIAL_HARD 0x00000020u
+
+/* These are additional options for pcre2_dfa_match(). */
+
+#define PCRE2_DFA_RESTART 0x00000040u
+#define PCRE2_DFA_SHORTEST 0x00000080u
+
+/* These are additional options for pcre2_substitute(), which passes any others
+through to pcre2_match(). */
+
+#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
+#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
+#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
+#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
+#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
+
+/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
+ignored for pcre2_jit_match(). */
+
+#define PCRE2_NO_JIT 0x00002000u
/* Options for pcre2_pattern_convert(). */
@@ -303,8 +318,6 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
-#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
-#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
/* "Expected" matching error codes: no match and partial match. */
@@ -491,10 +504,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
-/* The structures for passing out data via callout functions. We use structures
-so that new fields can be added on the end in future versions, without changing
-the API of the function, thereby allowing old clients to work without
-modification. Define the generic versions in a macro; the width-specific
+/* The structure for passing out data via the pcre_callout_function. We use a
+structure so that new fields can be added on the end in future versions,
+without changing the API of the function, thereby allowing old clients to work
+without modification. Define the generic version in a macro; the width-specific
versions are generated from this macro below. */
/* Flags for the callout_flags field. These are cleared after a callout. */
@@ -536,19 +549,7 @@ typedef struct pcre2_callout_enumerate_block { \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
-} pcre2_callout_enumerate_block; \
-\
-typedef struct pcre2_substitute_callout_block { \
- uint32_t version; /* Identifies version of block */ \
- /* ------------------------ Version 0 ------------------------------- */ \
- PCRE2_SPTR input; /* Pointer to input subject string */ \
- PCRE2_SPTR output; /* Pointer to output buffer */ \
- PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
- PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
- uint32_t oveccount; /* Count of pairs set in ovector */ \
- uint32_t subscount; /* Substitution number */ \
- /* ------------------------------------------------------------------ */ \
-} pcre2_substitute_callout_block;
+} pcre2_callout_enumerate_block;
/* List the generic forms of all other functions in macros, which will be
@@ -604,9 +605,6 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
- pcre2_set_substitute_callout(pcre2_match_context *, \
- int (*)(pcre2_substitute_callout_block *, void *), void *); \
-PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
@@ -809,7 +807,6 @@ pcre2_compile are called by application code. */
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
-#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
@@ -875,7 +872,6 @@ pcre2_compile are called by application code. */
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
-#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
diff --git a/dist2/src/pcre2_auto_possess.c b/dist2/src/pcre2_auto_possess.c
index 6d7b7c4a..2ce152e9 100644
--- a/dist2/src/pcre2_auto_possess.c
+++ b/dist2/src/pcre2_auto_possess.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -605,15 +605,6 @@ for(;;)
if (cb->had_recurse) return FALSE;
break;
- /* A script run might have to backtrack if the iterated item can match
- characters from more than one script. So give up unless repeating an
- explicit character. */
-
- case OP_SCRIPT_RUN:
- if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
- return FALSE;
- break;
-
/* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking
a previous iterator, this is not possible. */
@@ -623,6 +614,7 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
+
return !entered_a_group;
}
@@ -1051,7 +1043,7 @@ for(;;)
if (chr > 255) break;
class_bitset = (uint8_t *)
((list_ptr == list ? code : base_end) - list_ptr[2]);
- if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
+ if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
break;
#ifdef SUPPORT_WIDE_CHARS
diff --git a/dist2/src/pcre2_chartables.c b/dist2/src/pcre2_chartables.c
index 0e07edb4..203cb1a4 100644
--- a/dist2/src/pcre2_chartables.c
+++ b/dist2/src/pcre2_chartables.c
@@ -2,24 +2,23 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* This file was automatically written by the dftables auxiliary
-program. It contains character tables that are used when no external
-tables are passed to PCRE2 by the application that calls it. The tables
-are used only for characters whose code values are less than 256. */
-
-/*The dftables program (which is distributed with PCRE2) can be used to
-build alternative versions of this file. This is necessary if you are
-running in an EBCDIC environment, or if you want to default to a different
-encoding, for example ISO-8859-1. When dftables is run, it creates these
-tables in the current locale. This happens automatically if PCRE2 is
-configured with --enable-rebuild-chartables. */
-
-/* The following #include is present because without it gcc 4.x may remove
-the array definition from the final binary if PCRE2 is built into a static
-library and dead code stripping is activated. This leads to link errors.
-Pulling in the header ensures that the array gets flagged as "someone
-outside this compilation unit might reference this" and so it will always
-be supplied to the linker. */
+/* This file contains character tables that are used when no external tables
+are passed to PCRE2 by the application that calls it. The tables are used only
+for characters whose code values are less than 256.
+
+This is a default version of the tables that assumes ASCII encoding. A program
+called dftables (which is distributed with PCRE2) can be used to build
+alternative versions of this file. This is necessary if you are running in an
+EBCDIC environment, or if you want to default to a different encoding, for
+example ISO-8859-1. When dftables is run, it creates these tables in the
+current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
+happens automatically.
+
+The following #includes are present because without them gcc 4.x may remove the
+array definition from the final binary if PCRE2 is built into a static library
+and dead code stripping is activated. This leads to link errors. Pulling in the
+header ensures that the array gets flagged as "someone outside this compilation
+unit might reference this" and so it will always be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -102,7 +101,7 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph print, punct, and cntrl. Other classes are built from combinations. */
+graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -157,27 +156,28 @@ graph print, punct, and cntrl. Other classes are built from combinations. */
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
- 0x04 lower case letter
- 0x08 decimal digit
+ 0x04 decimal digit
+ 0x08 hexadecimal digit
0x10 alphanumeric or '_'
+ 0x80 regular expression metacharacter or binary zero
*/
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
+ 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
- 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
- 0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
+ 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
+ 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
+ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
+ 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
+ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
- 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
- 0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
- 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
- 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
- 0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
+ 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
+ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
+ 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
diff --git a/dist2/src/pcre2_chartables.c.dist b/dist2/src/pcre2_chartables.c.dist
index 0e07edb4..4046500c 100644
--- a/dist2/src/pcre2_chartables.c.dist
+++ b/dist2/src/pcre2_chartables.c.dist
@@ -157,8 +157,8 @@ graph print, punct, and cntrl. Other classes are built from combinations. */
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
- 0x04 lower case letter
- 0x08 decimal digit
+ 0x04 decimal digit
+ 0x08 hexadecimal digit
0x10 alphanumeric or '_'
*/
@@ -168,16 +168,16 @@ graph print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
- 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
- 0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
- 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
+ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
+ 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
+ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
- 0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
- 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
- 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
- 0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
+ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
+ 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
diff --git a/dist2/src/pcre2_compile.c b/dist2/src/pcre2_compile.c
index 068735ae..6bb1de36 100644
--- a/dist2/src/pcre2_compile.c
+++ b/dist2/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -240,57 +240,49 @@ code (meta_extra_lengths, just below) must be updated to remain in step. */
#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */
#define META_RECURSE 0x80200000u /* Recursion */
#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */
-#define META_SCRIPT_RUN 0x80220000u /* (*script_run:...) */
/* These must be kept together to make it easy to check that an assertion
is present where expected in a conditional group. */
-#define META_LOOKAHEAD 0x80230000u /* (?= */
-#define META_LOOKAHEADNOT 0x80240000u /* (?! */
-#define META_LOOKBEHIND 0x80250000u /* (?<= */
-#define META_LOOKBEHINDNOT 0x80260000u /* (?<! */
+#define META_LOOKAHEAD 0x80220000u /* (?= */
+#define META_LOOKAHEADNOT 0x80230000u /* (?! */
+#define META_LOOKBEHIND 0x80240000u /* (?<= */
+#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
/* These must be kept in this order, with consecutive values, and the _ARG
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */
-#define META_MARK 0x80270000u /* (*MARK) */
-#define META_ACCEPT 0x80280000u /* (*ACCEPT) */
-#define META_FAIL 0x80290000u /* (*FAIL) */
-#define META_COMMIT 0x802a0000u /* These */
-#define META_COMMIT_ARG 0x802b0000u /* pairs */
-#define META_PRUNE 0x802c0000u /* must */
-#define META_PRUNE_ARG 0x802d0000u /* be */
-#define META_SKIP 0x802e0000u /* kept */
-#define META_SKIP_ARG 0x802f0000u /* in */
-#define META_THEN 0x80300000u /* this */
-#define META_THEN_ARG 0x80310000u /* order */
+#define META_MARK 0x80260000u /* (*MARK) */
+#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
+#define META_FAIL 0x80280000u /* (*FAIL) */
+#define META_COMMIT 0x80290000u /* These */
+#define META_COMMIT_ARG 0x802a0000u /* pairs */
+#define META_PRUNE 0x802b0000u /* must */
+#define META_PRUNE_ARG 0x802c0000u /* be */
+#define META_SKIP 0x802d0000u /* kept */
+#define META_SKIP_ARG 0x802e0000u /* in */
+#define META_THEN 0x802f0000u /* this */
+#define META_THEN_ARG 0x80300000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */
-#define META_ASTERISK 0x80320000u /* * */
-#define META_ASTERISK_PLUS 0x80330000u /* *+ */
-#define META_ASTERISK_QUERY 0x80340000u /* *? */
-#define META_PLUS 0x80350000u /* + */
-#define META_PLUS_PLUS 0x80360000u /* ++ */
-#define META_PLUS_QUERY 0x80370000u /* +? */
-#define META_QUERY 0x80380000u /* ? */
-#define META_QUERY_PLUS 0x80390000u /* ?+ */
-#define META_QUERY_QUERY 0x803a0000u /* ?? */
-#define META_MINMAX 0x803b0000u /* {n,m} repeat */
-#define META_MINMAX_PLUS 0x803c0000u /* {n,m}+ repeat */
-#define META_MINMAX_QUERY 0x803d0000u /* {n,m}? repeat */
+#define META_ASTERISK 0x80310000u /* * */
+#define META_ASTERISK_PLUS 0x80320000u /* *+ */
+#define META_ASTERISK_QUERY 0x80330000u /* *? */
+#define META_PLUS 0x80340000u /* + */
+#define META_PLUS_PLUS 0x80350000u /* ++ */
+#define META_PLUS_QUERY 0x80360000u /* +? */
+#define META_QUERY 0x80370000u /* ? */
+#define META_QUERY_PLUS 0x80380000u /* ?+ */
+#define META_QUERY_QUERY 0x80390000u /* ?? */
+#define META_MINMAX 0x803a0000u /* {n,m} repeat */
+#define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
+#define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
#define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
-/* This is a special "meta code" that is used only to distinguish (*asr: from
-(*sr: in the table of aphabetic assertions. It is never stored in the parsed
-pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
-therefore no need for it to have a length entry, so use a high value. */
-
-#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
-
/* Table of extra lengths for each of the meta codes. Must be kept in step with
the definitions above. For some items these values are a basic length to which
a variable amount has to be added. */
@@ -330,7 +322,6 @@ static unsigned char meta_extra_lengths[] = {
0, /* META_RANGE_LITERAL */
SIZEOFFSET, /* META_RECURSE */
1+SIZEOFFSET, /* META_RECURSE_BYNAME */
- 0, /* META_SCRIPT_RUN */
0, /* META_LOOKAHEAD */
0, /* META_LOOKAHEADNOT */
SIZEOFFSET, /* META_LOOKBEHIND */
@@ -368,17 +359,17 @@ enum { PSKIP_ALT, PSKIP_CLASS, PSKIP_KET };
experimenting to figure out how to stop gcc 5.3.0 from warning with
-Wconversion. This version gets a warning:
- #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1u << ((b)&7))
+ #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1 << ((b)&7))
Let's hope the apparently less efficient version isn't actually so bad if the
compiler is clever with identical subexpressions. */
-#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
+#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1 << ((b)&7)))
/* Private flags added to firstcu and reqcu. */
-#define REQ_CASELESS (1u << 0) /* Indicates caselessness */
-#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */
+#define REQ_CASELESS (1 << 0) /* Indicates caselessness */
+#define REQ_VARY (1 << 1) /* reqcu followed non-literal item */
/* Negative values for the firstcu and reqcu flags */
#define REQ_UNSET (-2) /* Not yet found anything */
#define REQ_NONE (-1) /* Found not fixed char */
@@ -624,46 +615,6 @@ static const uint32_t verbops[] = {
OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
-/* Table of "alpha assertions" like (*pla:...), similar to the (*VERB) table. */
-
-typedef struct alasitem {
- unsigned int len; /* Length of name */
- uint32_t meta; /* Base META_ code */
-} alasitem;
-
-static const char alasnames[] =
- STRING_pla0
- STRING_plb0
- STRING_nla0
- STRING_nlb0
- STRING_positive_lookahead0
- STRING_positive_lookbehind0
- STRING_negative_lookahead0
- STRING_negative_lookbehind0
- STRING_atomic0
- STRING_sr0
- STRING_asr0
- STRING_script_run0
- STRING_atomic_script_run;
-
-static const alasitem alasmeta[] = {
- { 3, META_LOOKAHEAD },
- { 3, META_LOOKBEHIND },
- { 3, META_LOOKAHEADNOT },
- { 3, META_LOOKBEHINDNOT },
- { 18, META_LOOKAHEAD },
- { 19, META_LOOKBEHIND },
- { 18, META_LOOKAHEADNOT },
- { 19, META_LOOKBEHINDNOT },
- { 6, META_ATOMIC },
- { 2, META_SCRIPT_RUN }, /* sr = script run */
- { 3, META_ATOMIC_SCRIPT_RUN }, /* asr = atomic script run */
- { 10, META_SCRIPT_RUN }, /* script run */
- { 17, META_ATOMIC_SCRIPT_RUN } /* atomic script run */
-};
-
-static const int alascount = sizeof(alasmeta)/sizeof(alasitem);
-
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
static uint32_t chartypeoffset[] = {
@@ -763,8 +714,7 @@ are allowed. */
#define PUBLIC_COMPILE_EXTRA_OPTIONS \
(PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
- PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX)
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
@@ -781,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
- ERR91, ERR92, ERR93, ERR94, ERR95, ERR96 };
+ ERR91, ERR92, ERR93, ERR94 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -1012,7 +962,6 @@ for (;;)
case META_NOCAPTURE: fprintf(stderr, "META (?:"); break;
case META_LOOKAHEAD: fprintf(stderr, "META (?="); break;
case META_LOOKAHEADNOT: fprintf(stderr, "META (?!"); break;
- case META_SCRIPT_RUN: fprintf(stderr, "META (*sr:"); break;
case META_KET: fprintf(stderr, "META )"); break;
case META_ALT: fprintf(stderr, "META | %d", meta_arg); break;
@@ -1242,7 +1191,7 @@ if (code != NULL)
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
/* Decoded tables belong to the codes after deserialization, and they must
- be freed when there are no more references to them. The *ref_count should
+ be freed when there are no more reference to them. The *ref_count should
always be > 0. */
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
@@ -1449,7 +1398,7 @@ Arguments:
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
isclass TRUE if inside a character class
- cb compile data block or NULL when called from pcre2_substitute()
+ cb compile data block
Returns: zero => a data character
positive => a special escape sequence
@@ -1459,8 +1408,7 @@ Returns: zero => a data character
int
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
- int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
- compile_block *cb)
+ int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
PCRE2_SPTR ptr = *ptrptr;
@@ -1481,25 +1429,14 @@ GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
/* Non-alphanumerics are literals, so we just leave the value in c. An initial
value test saves a memory lookup for code points outside the alphanumeric
-range. */
+range. Otherwise, do a table lookup. A non-zero result is something that can be
+returned immediately. Otherwise further processing is required. */
if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */
-/* Otherwise, do a table lookup. Non-zero values need little processing here. A
-positive value is a literal value for something like \n. A negative value is
-the negation of one of the ESC_ macros that is passed back for handling by the
-calling function. Some extra checking is needed for \N because only \N{U+dddd}
-is supported. If the value is zero, further processing is handled below. */
-
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
{
- if (i > 0)
- {
- c = (uint32_t)i;
- if (c == CHAR_CR && (extra_options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)
- c = CHAR_LF;
- }
- else /* Negative table entry */
+ if (i > 0) c = (uint32_t)i; else /* Positive is a data character */
{
escape = -i; /* Else return a special escape */
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
@@ -1549,29 +1486,23 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
}
}
-/* Escapes that need further processing, including those that are unknown, have
-a zero entry in the lookup table. When called from pcre2_substitute(), only \c,
-\o, and \x are recognized (\u and \U can never appear as they are used for case
-forcing). */
+/* Escapes that need further processing, including those that are unknown.
+When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u
+when BSUX is set). */
else
{
- int s;
PCRE2_SPTR oldptr;
BOOL overflow;
- BOOL alt_bsux =
- ((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0;
+ int s;
/* Filter calls from pcre2_substitute(). */
- if (cb == NULL)
+ if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x &&
+ (c != CHAR_u || (options & PCRE2_ALT_BSUX) != 0))
{
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
- {
- *errorcodeptr = ERR3;
- return 0;
- }
- alt_bsux = FALSE; /* Do not modify \x handling */
+ *errorcodeptr = ERR3;
+ return 0;
}
switch (c)
@@ -1585,75 +1516,40 @@ else
*errorcodeptr = ERR37;
break;
- /* \u is unrecognized when neither PCRE2_ALT_BSUX nor PCRE2_EXTRA_ALT_BSUX
- is set. Otherwise, \u must be followed by exactly four hex digits or, if
- PCRE2_EXTRA_ALT_BSUX is set, by any number of hex digits in braces.
- Otherwise it is a lowercase u letter. This gives some compatibility with
- ECMAScript (aka JavaScript). */
+ /* \u is unrecognized when PCRE2_ALT_BSUX is not set. When it is treated
+ specially, \u must be followed by four hex digits. Otherwise it is a
+ lowercase u letter. */
case CHAR_u:
- if (!alt_bsux) *errorcodeptr = ERR37; else
+ if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else
{
uint32_t xc;
-
- if (ptr >= ptrend) break;
- if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
- (extra_options & PCRE2_EXTRA_ALT_BSUX) != 0)
- {
- PCRE2_SPTR hptr = ptr + 1;
- cc = 0;
-
- while (hptr < ptrend && (xc = XDIGIT(*hptr)) != 0xff)
- {
- if ((cc & 0xf0000000) != 0) /* Test for 32-bit overflow */
- {
- *errorcodeptr = ERR77;
- ptr = hptr; /* Show where */
- break; /* *hptr != } will cause another break below */
- }
- cc = (cc << 4) | xc;
- hptr++;
- }
-
- if (hptr == ptr + 1 || /* No hex digits */
- hptr >= ptrend || /* Hit end of input */
- *hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
- break; /* Hex escape not recognized */
-
- c = cc; /* Accept the code point */
- ptr = hptr + 1;
- }
-
- else /* Must be exactly 4 hex digits */
- {
- if (ptrend - ptr < 4) break; /* Less than 4 chars */
- if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
- if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
- cc = (cc << 4) | xc;
- if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */
- cc = (cc << 4) | xc;
- if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
- c = (cc << 4) | xc;
- ptr += 4;
- }
-
+ if (ptrend - ptr < 4) break; /* Less than 4 chars */
+ if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
+ if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
+ cc = (cc << 4) | xc;
+ if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */
+ cc = (cc << 4) | xc;
+ if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
+ c = (cc << 4) | xc;
+ ptr += 4;
if (utf)
{
if (c > 0x10ffffU) *errorcodeptr = ERR77;
else
if (c >= 0xd800 && c <= 0xdfff &&
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
- *errorcodeptr = ERR73;
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
+ *errorcodeptr = ERR73;
}
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
}
break;
- /* \U is unrecognized unless PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set,
- in which case it is an upper case letter. */
+ /* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an
+ upper case letter. */
case CHAR_U:
- if (!alt_bsux) *errorcodeptr = ERR37;
+ if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37;
break;
/* In a character class, \g is just a literal "g". Outside a character
@@ -1832,8 +1728,8 @@ else
}
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
{
- if (utf && c >= 0xd800 && c <= 0xdfff &&
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
+ if (utf && c >= 0xd800 && c <= 0xdfff && (cb == NULL ||
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0))
{
ptr--;
*errorcodeptr = ERR73;
@@ -1847,11 +1743,11 @@ else
}
break;
- /* When PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set, \x must be followed
- by two hexadecimal digits. Otherwise it is a lowercase x letter. */
+ /* \x is complicated. When PCRE2_ALT_BSUX is set, \x must be followed by
+ two hexadecimal digits. Otherwise it is a lowercase x letter. */
case CHAR_x:
- if (alt_bsux)
+ if ((options & PCRE2_ALT_BSUX) != 0)
{
uint32_t xc;
if (ptrend - ptr < 2) break; /* Less than 2 characters */
@@ -1859,9 +1755,9 @@ else
if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
c = (cc << 4) | xc;
ptr += 2;
- }
+ } /* End PCRE2_ALT_BSUX handling */
- /* Handle \x in Perl's style. \x{ddd} is a character code which can be
+ /* Handle \x in Perl's style. \x{ddd} is a character number which can be
greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex
digits. If not, { used to be treated as a data character. However, Perl
seems to read hex digits up to the first non-such, and ignore the rest, so
@@ -1905,8 +1801,8 @@ else
}
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
{
- if (utf && c >= 0xd800 && c <= 0xdfff &&
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
+ if (utf && c >= 0xd800 && c <= 0xdfff && (cb == NULL ||
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0))
{
ptr--;
*errorcodeptr = ERR73;
@@ -1978,9 +1874,9 @@ else
c ^= 0x40;
/* Handle \c in an EBCDIC environment. The special case \c? is converted to
- 255 (0xff) or 95 (0x5f) if other characters suggest we are using the
- POSIX-BC encoding. (This is the way Perl indicates that it handles \c?.)
- The other valid sequences correspond to a list of specific characters. */
+ 255 (0xff) or 95 (0x5f) if other character suggest we are using th POSIX-BC
+ encoding. (This is the way Perl indicates that it handles \c?.) The other
+ valid sequences correspond to a list of specific characters. */
#else
if (c == CHAR_QUESTION_MARK)
@@ -2224,10 +2120,9 @@ return -1;
*************************************************/
/* This function is called from parse_regex() below whenever it needs to read
-the name of a subpattern or a (*VERB) or an (*alpha_assertion). The initial
-pointer must be to the character before the name. If that character is '*' we
-are reading a verb or alpha assertion name. The pointer is updated to point
-after the name, for a VERB or alpha assertion name, or after tha name's
+the name of a subpattern or a (*VERB). The initial pointer must be to the
+character before the name. If that character is '*' we are reading a verb name.
+The pointer is updated to point after the name, for a VERB, or after tha name's
terminator for a subpattern name. Returning both the offset and the name
pointer is redundant information, but some callers use one and some the other,
so it is simplest just to return both.
@@ -2235,7 +2130,6 @@ so it is simplest just to return both.
Arguments:
ptrptr points to the character pointer variable
ptrend points to the end of the input string
- utf true if the input is UTF-encoded
terminator the terminator of a subpattern name must be this
offsetptr where to put the offset from the start of the pattern
nameptr where to put a pointer to the name in the input
@@ -2248,88 +2142,48 @@ Returns: TRUE if a name was read
*/
static BOOL
-read_name(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf, uint32_t terminator,
+read_name(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t terminator,
PCRE2_SIZE *offsetptr, PCRE2_SPTR *nameptr, uint32_t *namelenptr,
int *errorcodeptr, compile_block *cb)
{
PCRE2_SPTR ptr = *ptrptr;
-BOOL is_group = (*ptr != CHAR_ASTERISK);
+BOOL is_verb = (*ptr == CHAR_ASTERISK);
+uint32_t namelen = 0;
+uint32_t ctype = is_verb? ctype_letter : ctype_word;
-if (++ptr >= ptrend) /* No characters in name */
+if (++ptr >= ptrend)
{
- *errorcodeptr = is_group? ERR62: /* Subpattern name expected */
- ERR60; /* Verb not recognized or malformed */
+ *errorcodeptr = is_verb? ERR60: /* Verb not recognized or malformed */
+ ERR62; /* Subpattern name expected */
goto FAILED;
}
*nameptr = ptr;
*offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern);
-/* In UTF mode, a group name may contain letters and decimal digits as defined
-by Unicode properties, and underscores, but must not start with a digit. */
-
-#ifdef SUPPORT_UNICODE
-if (utf && is_group)
+if (IS_DIGIT(*ptr))
{
- uint32_t c, type;
-
- GETCHAR(c, ptr);
- type = UCD_CHARTYPE(c);
-
- if (type == ucp_Nd)
- {
- *errorcodeptr = ERR44;
- goto FAILED;
- }
-
- for(;;)
- {
- if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L &&
- c != CHAR_UNDERSCORE) break;
- ptr++;
- FORWARDCHARTEST(ptr, ptrend);
- if (ptr >= ptrend) break;
- GETCHAR(c, ptr);
- type = UCD_CHARTYPE(c);
- }
+ *errorcodeptr = ERR44; /* Group name must not start with digit */
+ goto FAILED;
}
-else
-#else
-(void)utf; /* Avoid compiler warning */
-#endif /* SUPPORT_UNICODE */
-
-/* Handle non-group names and group names in non-UTF modes. A group name must
-not start with a digit. If either of the others start with a digit it just
-won't be recognized. */
+while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0)
{
- if (is_group && IS_DIGIT(*ptr))
+ ptr++;
+ namelen++;
+ if (namelen > MAX_NAME_SIZE)
{
- *errorcodeptr = ERR44;
+ *errorcodeptr = ERR48;
goto FAILED;
}
-
- while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0)
- {
- ptr++;
- }
}
-/* Check name length */
-
-if (ptr > *nameptr + MAX_NAME_SIZE)
- {
- *errorcodeptr = ERR48;
- goto FAILED;
- }
-*namelenptr = ptr - *nameptr;
-
/* Subpattern names must not be empty, and their terminator is checked here.
-(What follows a verb or alpha assertion name is checked separately.) */
+(What follows a verb name is checked separately.) */
-if (is_group)
+if (!is_verb)
{
- if (ptr == *nameptr)
+ if (namelen == 0)
{
*errorcodeptr = ERR62; /* Subpattern name expected */
goto FAILED;
@@ -2342,6 +2196,7 @@ if (is_group)
ptr++;
}
+*namelenptr = namelen;
*ptrptr = ptr;
return TRUE;
@@ -2434,7 +2289,6 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-#define NSF_ATOMICSR 0x0004u
/* Options that are changeable within the pattern must be tracked during
parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
@@ -2479,7 +2333,6 @@ uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
-uint32_t extra_options = cb->cx->extra_options;
uint16_t nest_depth = 0;
int after_manual_callout = 0;
int expect_cond_assert = 0;
@@ -2503,12 +2356,12 @@ nest_save *top_nest, *end_nests;
/* Insert leading items for word and line matching (features provided for the
benefit of pcre2grep). */
-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
+if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
{
*parsed_pattern++ = META_CIRCUMFLEX;
*parsed_pattern++ = META_NOCAPTURE;
}
-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
+else if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
{
*parsed_pattern++ = META_ESCAPE + ESC_b;
*parsed_pattern++ = META_NOCAPTURE;
@@ -2673,7 +2526,7 @@ while (ptr < ptrend)
if ((options & PCRE2_ALT_VERBNAMES) != 0)
{
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
- cb->cx->extra_options, FALSE, cb);
+ FALSE, cb);
if (errorcode != 0) goto FAILED;
}
else escape = 0; /* Treat all as literal */
@@ -2786,30 +2639,23 @@ while (ptr < ptrend)
if (expect_cond_assert > 0)
{
BOOL ok = c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 3 &&
- (ptr[0] == CHAR_QUESTION_MARK || ptr[0] == CHAR_ASTERISK);
- if (ok)
+ ptr[0] == CHAR_QUESTION_MARK;
+ if (ok) switch(ptr[1])
{
- if (ptr[0] == CHAR_ASTERISK) /* New alpha assertion format, possibly */
- {
- ok = MAX_255(ptr[1]) && (cb->ctypes[ptr[1]] & ctype_lcletter) != 0;
- }
- else switch(ptr[1]) /* Traditional symbolic format */
- {
- case CHAR_C:
- ok = expect_cond_assert == 2;
- break;
+ case CHAR_C:
+ ok = expect_cond_assert == 2;
+ break;
- case CHAR_EQUALS_SIGN:
- case CHAR_EXCLAMATION_MARK:
- break;
+ case CHAR_EQUALS_SIGN:
+ case CHAR_EXCLAMATION_MARK:
+ break;
- case CHAR_LESS_THAN_SIGN:
- ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
- break;
+ case CHAR_LESS_THAN_SIGN:
+ ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
+ break;
- default:
- ok = FALSE;
- }
+ default:
+ ok = FALSE;
}
if (!ok)
@@ -2863,11 +2709,11 @@ while (ptr < ptrend)
case CHAR_BACKSLASH:
tempptr = ptr;
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
- cb->cx->extra_options, FALSE, cb);
+ FALSE, cb);
if (errorcode != 0)
{
ESCAPE_FAILED:
- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
+ if ((cb->cx->extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
goto FAILED;
ptr = tempptr;
if (ptr >= ptrend) c = CHAR_BACKSLASH; else
@@ -3061,7 +2907,7 @@ while (ptr < ptrend)
/* Not a numerical recursion */
- if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen,
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
&errorcode, cb)) goto ESCAPE_FAILED;
/* \k and \g when used with braces are back references, whereas \g used
@@ -3424,12 +3270,12 @@ while (ptr < ptrend)
else
{
tempptr = ptr;
- escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
- cb->cx->extra_options, TRUE, cb);
-
+ escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
+ options, TRUE, cb);
if (errorcode != 0)
{
- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
+ CLASS_ESCAPE_FAILED:
+ if ((cb->cx->extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
goto FAILED;
ptr = tempptr;
if (ptr >= ptrend) c = CHAR_BACKSLASH; else
@@ -3439,32 +3285,30 @@ while (ptr < ptrend)
escape = 0; /* Treat as literal character */
}
- switch(escape)
+ if (escape == 0) /* Escaped character code point is in c */
{
- case 0: /* Escaped character code point is in c */
char_is_literal = FALSE;
goto CLASS_LITERAL;
+ }
+
+ /* These three escapes do not alter the class range state. */
- case ESC_b:
- c = CHAR_BS; /* \b is backspace in a class */
+ if (escape == ESC_b)
+ {
+ c = CHAR_BS; /* \b is backspace in a class */
char_is_literal = FALSE;
goto CLASS_LITERAL;
+ }
- case ESC_Q:
+ else if (escape == ESC_Q)
+ {
inescq = TRUE; /* Enter literal mode */
goto CLASS_CONTINUE;
+ }
- case ESC_E: /* Ignore orphan \E */
+ else if (escape == ESC_E) /* Ignore orphan \E */
goto CLASS_CONTINUE;
- case ESC_B: /* Always an error in a class */
- case ESC_R:
- case ESC_X:
- errorcode = ERR7;
- ptr--;
- goto FAILED;
- }
-
/* The second part of a range can be a single-character escape
sequence (detected above), but not any of the other escapes. Perl
treats a hyphen as a literal in such circumstances. However, in Perl's
@@ -3474,7 +3318,7 @@ while (ptr < ptrend)
if (class_range_state == RANGE_STARTED)
{
errorcode = ERR50;
- goto FAILED; /* Not CLASS_ESCAPE_FAILED; always an error */
+ goto CLASS_ESCAPE_FAILED;
}
/* Of the remaining escapes, only those that define characters are
@@ -3484,8 +3328,8 @@ while (ptr < ptrend)
switch(escape)
{
case ESC_N:
- errorcode = ERR71;
- goto FAILED;
+ errorcode = ERR71; /* Not supported in a class */
+ goto CLASS_ESCAPE_FAILED;
case ESC_H:
case ESC_h:
@@ -3548,14 +3392,14 @@ while (ptr < ptrend)
}
#else
errorcode = ERR45;
- goto FAILED;
+ goto CLASS_ESCAPE_FAILED;
#endif
break; /* End \P and \p */
default: /* All others are not allowed in a class */
errorcode = ERR7;
ptr--;
- goto FAILED;
+ goto CLASS_ESCAPE_FAILED;
}
/* Perl gives a warning unless a following hyphen is the last character
@@ -3596,8 +3440,7 @@ while (ptr < ptrend)
case CHAR_LEFT_PARENTHESIS:
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
- /* If ( is not followed by ? it is either a capture or a special verb or an
- alpha assertion. */
+ /* If ( is not followed by ? it is either a capture or a special verb. */
if (*ptr != CHAR_QUESTION_MARK)
{
@@ -3617,122 +3460,17 @@ while (ptr < ptrend)
else *parsed_pattern++ = META_NOCAPTURE;
}
- /* Do nothing for (* followed by end of pattern or ) so it gives a "bad
- quantifier" error rather than "(*MARK) must have an argument". */
-
- else if (ptrend - ptr <= 1 || (c = ptr[1]) == CHAR_RIGHT_PARENTHESIS)
- break;
-
- /* Handle "alpha assertions" such as (*pla:...). Most of these are
- synonyms for the historical symbolic assertions, but the script run ones
- are new. They are distinguished by starting with a lower case letter.
- Checking both ends of the alphabet makes this work in all character
- codes. */
-
- else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
- {
- uint32_t meta;
-
- vn = alasnames;
- if (!read_name(&ptr, ptrend, utf, 0, &offset, &name, &namelen,
- &errorcode, cb)) goto FAILED;
- if (ptr >= ptrend || *ptr != CHAR_COLON)
- {
- errorcode = ERR95; /* Malformed */
- goto FAILED;
- }
-
- /* Scan the table of alpha assertion names */
-
- for (i = 0; i < alascount; i++)
- {
- if (namelen == alasmeta[i].len &&
- PRIV(strncmp_c8)(name, vn, namelen) == 0)
- break;
- vn += alasmeta[i].len + 1;
- }
-
- if (i >= alascount)
- {
- errorcode = ERR95; /* Alpha assertion not recognized */
- goto FAILED;
- }
-
- /* Check for expecting an assertion condition. If so, only lookaround
- assertions are valid. */
-
- meta = alasmeta[i].meta;
- if (prev_expect_cond_assert > 0 &&
- (meta < META_LOOKAHEAD || meta > META_LOOKBEHINDNOT))
- {
- errorcode = ERR28; /* Assertion expected */
- goto FAILED;
- }
-
- /* The lookaround alphabetic synonyms can be almost entirely handled by
- jumping to the code that handles the traditional symbolic forms. */
-
- switch(meta)
- {
- default:
- errorcode = ERR89; /* Unknown code; should never occur because */
- goto FAILED; /* the meta values come from a table above. */
-
- case META_ATOMIC:
- goto ATOMIC_GROUP;
-
- case META_LOOKAHEAD:
- goto POSITIVE_LOOK_AHEAD;
-
- case META_LOOKAHEADNOT:
- goto NEGATIVE_LOOK_AHEAD;
-
- case META_LOOKBEHIND:
- case META_LOOKBEHINDNOT:
- *parsed_pattern++ = meta;
- ptr--;
- goto POST_LOOKBEHIND;
-
- /* The script run facilities are handled here. Unicode support is
- required (give an error if not, as this is a security issue). Always
- record a META_SCRIPT_RUN item. Then, for the atomic version, insert
- META_ATOMIC and remember that we need two META_KETs at the end. */
-
- case META_SCRIPT_RUN:
- case META_ATOMIC_SCRIPT_RUN:
-#ifdef SUPPORT_UNICODE
- *parsed_pattern++ = META_SCRIPT_RUN;
- nest_depth++;
- ptr++;
- if (meta == META_ATOMIC_SCRIPT_RUN)
- {
- *parsed_pattern++ = META_ATOMIC;
- if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace);
- else if (++top_nest >= end_nests)
- {
- errorcode = ERR84;
- goto FAILED;
- }
- top_nest->nest_depth = nest_depth;
- top_nest->flags = NSF_ATOMICSR;
- top_nest->options = options & PARSE_TRACKED_OPTIONS;
- }
- break;
-#else /* SUPPORT_UNICODE */
- errorcode = ERR96;
- goto FAILED;
-#endif
- }
- }
-
/* ---- Handle (*VERB) and (*VERB:NAME) ---- */
- else
+ /* Do nothing for (*) so it gives a "bad quantifier" error rather than
+ "(*MARK) must have an argument". */
+
+ else if (ptrend - ptr > 1 && ptr[1] != CHAR_RIGHT_PARENTHESIS)
{
vn = verbnames;
- if (!read_name(&ptr, ptrend, utf, 0, &offset, &name, &namelen,
- &errorcode, cb)) goto FAILED;
+ if (!read_name(&ptr, ptrend, 0, &offset, &name, &namelen, &errorcode,
+ cb)) goto FAILED;
if (ptr >= ptrend || (*ptr != CHAR_COLON &&
*ptr != CHAR_RIGHT_PARENTHESIS))
{
@@ -3987,7 +3725,7 @@ while (ptr < ptrend)
errorcode = ERR41;
goto FAILED;
}
- if (!read_name(&ptr, ptrend, utf, CHAR_RIGHT_PARENTHESIS, &offset, &name,
+ if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name,
&namelen, &errorcode, cb)) goto FAILED;
*parsed_pattern++ = META_BACKREF_BYNAME;
*parsed_pattern++ = namelen;
@@ -4047,7 +3785,7 @@ while (ptr < ptrend)
case CHAR_AMPERSAND:
RECURSE_BY_NAME:
- if (!read_name(&ptr, ptrend, utf, CHAR_RIGHT_PARENTHESIS, &offset, &name,
+ if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name,
&namelen, &errorcode, cb)) goto FAILED;
*parsed_pattern++ = META_RECURSE_BYNAME;
*parsed_pattern++ = namelen;
@@ -4195,15 +3933,14 @@ while (ptr < ptrend)
if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
nest_depth++;
- /* If the next character is ? or * there must be an assertion next
- (optionally preceded by a callout). We do not check this here, but
- instead we set expect_cond_assert to 2. If this is still greater than
- zero (callouts decrement it) when the next assertion is read, it will be
- marked as a condition that must not be repeated. A value greater than
- zero also causes checking that an assertion (possibly with callout)
- follows. */
+ /* If the next character is ? there must be an assertion next (optionally
+ preceded by a callout). We do not check this here, but instead we set
+ expect_cond_assert to 2. If this is still greater than zero (callouts
+ decrement it) when the next assertion is read, it will be marked as a
+ condition that must not be repeated. A value greater than zero also
+ causes checking that an assertion (possibly with callout) follows. */
- if (*ptr == CHAR_QUESTION_MARK || *ptr == CHAR_ASTERISK)
+ if (*ptr == CHAR_QUESTION_MARK)
{
*parsed_pattern++ = META_COND_ASSERT;
ptr--; /* Pull pointer back to the opening parenthesis. */
@@ -4295,7 +4032,7 @@ while (ptr < ptrend)
terminator = CHAR_RIGHT_PARENTHESIS;
ptr--; /* Point to char before name */
}
- if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen,
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
&errorcode, cb)) goto FAILED;
/* Handle (?(R&name) */
@@ -4349,7 +4086,6 @@ while (ptr < ptrend)
/* ---- Atomic group ---- */
case CHAR_GREATER_THAN_SIGN:
- ATOMIC_GROUP: /* Come from (*atomic: */
*parsed_pattern++ = META_ATOMIC;
nest_depth++;
ptr++;
@@ -4359,13 +4095,11 @@ while (ptr < ptrend)
/* ---- Lookahead assertions ---- */
case CHAR_EQUALS_SIGN:
- POSITIVE_LOOK_AHEAD: /* Come from (*pla: */
*parsed_pattern++ = META_LOOKAHEAD;
ptr++;
goto POST_ASSERTION;
case CHAR_EXCLAMATION_MARK:
- NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
*parsed_pattern++ = META_LOOKAHEADNOT;
ptr++;
goto POST_ASSERTION;
@@ -4385,8 +4119,6 @@ while (ptr < ptrend)
}
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
META_LOOKBEHIND : META_LOOKBEHINDNOT;
-
- POST_LOOKBEHIND: /* Come from (*plb: and (*nlb: */
*has_lookbehind = TRUE;
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
PUTOFFSET(offset, parsed_pattern);
@@ -4429,7 +4161,7 @@ while (ptr < ptrend)
terminator = CHAR_APOSTROPHE; /* Terminator */
DEFINE_NAME:
- if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen,
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
&errorcode, cb)) goto FAILED;
/* We have a name for this capturing group. It is also assigned a number,
@@ -4548,14 +4280,6 @@ while (ptr < ptrend)
cb->bracount = top_nest->max_group;
if ((top_nest->flags & NSF_CONDASSERT) != 0)
okquantifier = FALSE;
-
- if ((top_nest->flags & NSF_ATOMICSR) != 0)
- {
- *parsed_pattern++ = META_KET;
- }
-
-
-
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
@@ -4587,12 +4311,12 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout,
/* Insert trailing items for word and line matching (features provided for the
benefit of pcre2grep). */
-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
+if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
{
*parsed_pattern++ = META_KET;
*parsed_pattern++ = META_DOLLAR;
}
-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
+else if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
{
*parsed_pattern++ = META_KET;
*parsed_pattern++ = META_ESCAPE + ESC_b;
@@ -4697,14 +4421,6 @@ for (;;)
code += GET(code, 1) + 1 + LINK_SIZE;
break;
- case OP_MARK:
- case OP_COMMIT_ARG:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- code += code[1] + PRIV(OP_lengths)[*code];
- break;
-
default:
return code;
}
@@ -5800,10 +5516,10 @@ for (;; pptr++)
if (range_is_literal &&
(cb->ctypes[c] & ctype_letter) != 0 &&
(cb->ctypes[d] & ctype_letter) != 0 &&
- (c <= CHAR_z) == (d <= CHAR_z))
+ (d <= CHAR_z) == (d <= CHAR_z))
{
uint32_t uc = (d <= CHAR_z)? 0 : 64;
- uint32_t C = c - uc;
+ uint32_t C = d - uc;
uint32_t D = d - uc;
if (C <= CHAR_i)
@@ -5948,10 +5664,7 @@ for (;; pptr++)
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop)
- {
- /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
- for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
- }
+ for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
memcpy(code, classbits, 32);
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
}
@@ -5974,10 +5687,7 @@ for (;; pptr++)
if (lengthptr == NULL) /* Save time in the pre-compile phase */
{
if (negate_class)
- {
- /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
- for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
- }
+ for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
memcpy(code, classbits, 32);
}
code += 32 / sizeof(PCRE2_UCHAR);
@@ -6191,7 +5901,7 @@ for (;; pptr++)
}
goto GROUP_PROCESS_NOTE_EMPTY;
- /* The DEFINE condition is always false. Its internal groups may never
+ /* The DEFINE condition is always false. It's internal groups may never
be called, so matched_char must remain false, hence the jump to
GROUP_PROCESS rather than GROUP_PROCESS_NOTE_EMPTY. */
@@ -6287,10 +5997,6 @@ for (;; pptr++)
bravalue = OP_ONCE;
goto GROUP_PROCESS_NOTE_EMPTY;
- case META_SCRIPT_RUN:
- bravalue = OP_SCRIPT_RUN;
- goto GROUP_PROCESS_NOTE_EMPTY;
-
case META_NOCAPTURE:
bravalue = OP_BRA;
/* Fall through */
@@ -6531,8 +6237,8 @@ for (;; pptr++)
groupnumber = ng->number;
/* For a recursion, that's all that is needed. We can now go to
- the code that handles numerical recursion, applying it to the first
- group with the given name. */
+ the code above that handles numerical recursion, applying it to
+ the first group with the given name. */
if (meta == META_RECURSE_BYNAME)
{
@@ -6926,7 +6632,6 @@ for (;; pptr++)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -7139,16 +6844,16 @@ for (;; pptr++)
}
/* If the maximum is unlimited, set a repeater in the final copy. For
- SCRIPT_RUN and ONCE brackets, that's all we need to do. However,
- possessively repeated ONCE brackets can be converted into non-capturing
- brackets, as the behaviour of (?:xx)++ is the same as (?>xx)++ and this
- saves having to deal with possessive ONCEs specially.
+ ONCE brackets, that's all we need to do. However, possessively repeated
+ ONCE brackets can be converted into non-capturing brackets, as the
+ behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
+ deal with possessive ONCEs specially.
Otherwise, when we are doing the actual compile phase, check to see
whether this group is one that could match an empty string. If so,
convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
that runtime checking can be done. [This check is also applied to ONCE
- and SCRIPT_RUN groups at runtime, but in a different way.]
+ groups at runtime, but in a different way.]
Then, if the quantifier was possessive and the bracket is not a
conditional, we convert the BRA code to the POS form, and the KET code to
@@ -7172,14 +6877,13 @@ for (;; pptr++)
if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
- /* For non-possessive ONCE and for SCRIPT_RUN brackets, all we need
- to do is to set the KET. */
+ /* For non-possessive ONCE brackets, all we need to do is to
+ set the KET. */
- if (*bracode == OP_ONCE || *bracode == OP_SCRIPT_RUN)
- *ketcode = OP_KETRMAX + repeat_type;
+ if (*bracode == OP_ONCE) *ketcode = OP_KETRMAX + repeat_type;
- /* Handle non-SCRIPT_RUN and non-ONCE brackets and possessive ONCEs
- (which have been converted to non-capturing above). */
+ /* Handle non-ONCE brackets and possessive ONCEs (which have been
+ converted to non-capturing above). */
else
{
@@ -7563,8 +7267,9 @@ for (;; pptr++)
scanned and these numbers are replaced by offsets within the pattern. It is
done like this to avoid problems with forward references and adjusting
offsets when groups are duplicated and moved (as discovered in previous
- implementations). Note that a recursion does not have a set first
- character. */
+ implementations). Note that a recursion does not have a set first character
+ (relevant if it is repeated, because it will then be wrapped with ONCE
+ brackets). */
case META_RECURSE:
GETPLUSOFFSET(offset, pptr);
@@ -7581,8 +7286,6 @@ for (;; pptr++)
groupsetfirstcu = FALSE;
cb->had_recurse = TRUE;
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- zerofirstcu = firstcu;
- zerofirstcuflags = firstcuflags;
break;
@@ -7637,20 +7340,9 @@ for (;; pptr++)
{
uint32_t ptype = *(++pptr) >> 16;
uint32_t pdata = *pptr & 0xffff;
-
- /* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit
- from the auto-anchoring code. */
-
- if (meta_arg == ESC_p && ptype == PT_ANY)
- {
- *code++ = OP_ALLANY;
- }
- else
- {
- *code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
- *code++ = ptype;
- *code++ = pdata;
- }
+ *code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
+ *code++ = ptype;
+ *code++ = pdata;
break; /* End META_ESCAPE */
}
#endif
@@ -8548,7 +8240,6 @@ do {
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
d = find_firstassertedcu(scode, &dflags, inassert + ((op==OP_ASSERT)?1:0));
if (dflags < 0)
return 0;
@@ -8748,7 +8439,6 @@ for (;; pptr++)
case META_LOOKBEHIND:
case META_LOOKBEHINDNOT:
case META_NOCAPTURE:
- case META_SCRIPT_RUN:
nestlevel++;
break;
@@ -9161,7 +8851,6 @@ for (;; pptr++)
case META_ATOMIC:
case META_NOCAPTURE:
- case META_SCRIPT_RUN:
pptr++;
CHECK_GROUP:
grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
@@ -9341,7 +9030,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_QUERY_QUERY:
case META_RANGE_ESCAPED:
case META_RANGE_LITERAL:
- case META_SCRIPT_RUN:
case META_SKIP:
case META_THEN:
break;
diff --git a/dist2/src/pcre2_context.c b/dist2/src/pcre2_context.c
index 9c2886a6..2c14df00 100644
--- a/dist2/src/pcre2_context.c
+++ b/dist2/src/pcre2_context.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -163,13 +163,11 @@ when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
#ifdef SUPPORT_JIT
- NULL, /* JIT callback */
- NULL, /* JIT callback data */
+ NULL,
+ NULL,
#endif
- NULL, /* Callout function */
- NULL, /* Callout data */
- NULL, /* Substitute callout function */
- NULL, /* Substitute callout data */
+ NULL,
+ NULL,
PCRE2_UNSET, /* Offset limit */
HEAP_LIMIT,
MATCH_LIMIT,
@@ -406,16 +404,6 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_set_substitute_callout(pcre2_match_context *mcontext,
- int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
- void *substitute_callout_data)
-{
-mcontext->substitute_callout = substitute_callout;
-mcontext->substitute_callout_data = substitute_callout_data;
-return 0;
-}
-
-PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->heap_limit = limit;
diff --git a/dist2/src/pcre2_convert.c b/dist2/src/pcre2_convert.c
index d45b6fee..1dd5c337 100644
--- a/dist2/src/pcre2_convert.c
+++ b/dist2/src/pcre2_convert.c
@@ -276,7 +276,7 @@ while (plength > 0)
break;
case CHAR_BACKSLASH:
- if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
+ if (plength <= 0) return PCRE2_ERROR_END_BACKSLASH;
if (extended) nextisliteral = TRUE; else
{
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
diff --git a/dist2/src/pcre2_dfa_match.c b/dist2/src/pcre2_dfa_match.c
index bbf3e210..9b43237d 100644
--- a/dist2/src/pcre2_dfa_match.c
+++ b/dist2/src/pcre2_dfa_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -85,8 +85,7 @@ in others, so I abandoned this code. */
#define PUBLIC_DFA_MATCH_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
- PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
- PCRE2_COPY_MATCHED_SUBJECT)
+ PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART)
/*************************************************
@@ -174,7 +173,6 @@ static const uint8_t coptable[] = {
0, /* Assert behind */
0, /* Assert behind not */
0, /* ONCE */
- 0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -249,7 +247,6 @@ static const uint8_t poptable[] = {
0, /* Assert behind */
0, /* Assert behind not */
0, /* ONCE */
- 0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -319,8 +316,8 @@ finding the minimum heap requirement for a match. */
typedef struct RWS_anchor {
struct RWS_anchor *next;
- uint32_t size; /* Number of ints */
- uint32_t free; /* Number of ints */
+ unsigned int size; /* Number of ints */
+ unsigned int free; /* Number of ints */
} RWS_anchor;
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
@@ -416,24 +413,20 @@ if (rws->next != NULL)
new = rws->next;
}
-/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
-mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
-overflow. */
+/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
+kibibytes. */
else
{
- uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2;
- uint32_t newsizeK = newsize/(1024/sizeof(int));
-
- if (newsizeK + mb->heap_used > mb->heap_limit)
- newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
- newsize = newsizeK*(1024/sizeof(int));
-
+ unsigned int newsize = rws->size * 2;
+ unsigned int heapleft = (unsigned int)
+ (((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
+ if (newsize > heapleft) newsize = heapleft;
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
return PCRE2_ERROR_HEAPLIMIT;
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
- mb->heap_used += newsizeK;
+ mb->heap_used += newsize;
new->next = NULL;
new->size = newsize;
rws->next = new;
@@ -2567,7 +2560,7 @@ for (;;)
if (clen > 0)
{
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
- ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
+ ((((uint8_t *)(code + 1))[c/8] & (1 << (c&7))) != 0);
}
}
@@ -2760,7 +2753,7 @@ for (;;)
/* There is also an always-true condition */
else if (condcode == OP_TRUE)
- { ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
+ { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
/* The only supported version of OP_RREF is for the value RREF_ANY,
which means "test if in any recursion". We can't test for specifically
@@ -3233,8 +3226,6 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{
int rc;
-int was_zero_terminated = 0;
-
const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match;
@@ -3274,11 +3265,7 @@ rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
-if (length == PCRE2_ZERO_TERMINATED)
- {
- length = PRIV(strlen)(subject);
- was_zero_terminated = 1;
- }
+if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */
@@ -3531,20 +3518,10 @@ if ((re->flags & PCRE2_LASTSET) != 0)
}
}
-/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
-free the memory that was obtained. */
-
-if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
- {
- match_data->memctl.free((void *)match_data->subject,
- match_data->memctl.memory_data);
- match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
- }
-
/* Fill in fields that are always returned in the match data. */
match_data->code = re;
-match_data->subject = NULL; /* Default for no match */
+match_data->subject = subject;
match_data->mark = NULL;
match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
@@ -3609,7 +3586,7 @@ for (;;)
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
- ok = (start_bits[c/8] & (1u << (c&7))) != 0;
+ ok = (start_bits[c/8] & (1 << (c&7))) != 0;
}
}
if (!ok) break;
@@ -3720,7 +3697,7 @@ for (;;)
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
start_match++;
}
@@ -3839,20 +3816,6 @@ for (;;)
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
-
- if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
- {
- length = CU2BYTES(length + was_zero_terminated);
- match_data->subject = match_data->memctl.malloc(length,
- match_data->memctl.memory_data);
- if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
- memcpy((void *)match_data->subject, subject, length);
- match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
- }
- else
- {
- if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
- }
goto EXIT;
}
diff --git a/dist2/src/pcre2_error.c b/dist2/src/pcre2_error.c
index 1d02cf14..4b3b3f1b 100644
--- a/dist2/src/pcre2_error.c
+++ b/dist2/src/pcre2_error.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -71,7 +71,7 @@ static const unsigned char compile_error_texts[] =
/* 5 */
"number too big in {} quantifier\0"
"missing terminating ] for character class\0"
- "escape sequence is invalid in character class\0"
+ "invalid escape sequence in character class\0"
"range out of order in character class\0"
"quantifier does not follow a repeatable item\0"
/* 10 */
@@ -95,7 +95,7 @@ static const unsigned char compile_error_texts[] =
/* 25 */
"lookbehind assertion is not fixed length\0"
"a relative value of zero is not allowed\0"
- "conditional subpattern contains more than two branches\0"
+ "conditional group contains more than two branches\0"
"assertion expected after (?( or (?(?C)\0"
"digit expected after (?+ or (?-\0"
/* 30 */
@@ -113,21 +113,21 @@ static const unsigned char compile_error_texts[] =
/* 40 */
"invalid escape sequence in (*VERB) name\0"
"unrecognized character after (?P\0"
- "syntax error in subpattern name (missing terminator?)\0"
+ "syntax error in subpattern name (missing terminator)\0"
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
- "subpattern name must start with a non-digit\0"
+ "group name must start with a non-digit\0"
/* 45 */
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
"malformed \\P or \\p sequence\0"
"unknown property name after \\P or \\p\0"
- "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
+ "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
"invalid range in character class\0"
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
"internal error: overran compiling workspace\0"
"internal error: previously-checked referenced subpattern not found\0"
- "DEFINE subpattern contains more than one branch\0"
+ "DEFINE group contains more than one branch\0"
/* 55 */
"missing opening brace after \\o\0"
"internal error: unknown newline setting\0"
@@ -137,7 +137,7 @@ static const unsigned char compile_error_texts[] =
"obsolete error (should not occur)\0" /* Was the above */
/* 60 */
"(*VERB) not recognized or malformed\0"
- "subpattern number is too big\0"
+ "group number is too big\0"
"subpattern name expected\0"
"internal error: parsed pattern overflow\0"
"non-octal character in \\o{} (closing brace missing?)\0"
@@ -181,9 +181,6 @@ static const unsigned char compile_error_texts[] =
"invalid option bits with PCRE2_LITERAL\0"
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
"invalid hyphen in option setting\0"
- /* 95 */
- "(*alpha_assertion) not recognized\0"
- "script runs require Unicode support, which this version of PCRE2 does not have\0"
;
/* Match-time and UTF error texts are in the same format. */
diff --git a/dist2/src/pcre2_extuni.c b/dist2/src/pcre2_extuni.c
index 5a719e9c..237211ab 100644
--- a/dist2/src/pcre2_extuni.c
+++ b/dist2/src/pcre2_extuni.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -100,7 +100,7 @@ while (eptr < end_subject)
int len = 1;
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
diff --git a/dist2/src/pcre2_internal.h b/dist2/src/pcre2_internal.h
index 814d91bd..8750f2f1 100644
--- a/dist2/src/pcre2_internal.h
+++ b/dist2/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -148,7 +148,16 @@ pcre2_match() because of the way it backtracks. */
/* When checking for integer overflow in pcre2_compile(), we need to handle
large integers. If a 64-bit integer type is available, we can use that.
Otherwise we have to cast to double, which of course requires floating point
-arithmetic. Handle this by defining a macro for the appropriate type. */
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. Systems that do not
+have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
+by "configure". */
+
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
#if defined INT64_MAX || defined int64_t
#define INT64_OR_DOUBLE int64_t
@@ -526,10 +535,6 @@ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
-/* Values for the flags field in a match data block. */
-
-#define PCRE2_MD_COPIED_SUBJECT 0x01u
-
/* Magic number to provide a small check against being handed junk. */
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
@@ -564,11 +569,11 @@ these tables. */
without checking pcre2_jit_compile.c, which has an assertion to ensure that
ctype_word has the value 16. */
-#define ctype_space 0x01
-#define ctype_letter 0x02
-#define ctype_lcletter 0x04
-#define ctype_digit 0x08
-#define ctype_word 0x10 /* alphanumeric or '_' */
+#define ctype_space 0x01
+#define ctype_letter 0x02
+#define ctype_digit 0x04
+#define ctype_xdigit 0x08 /* not actually used any more */
+#define ctype_word 0x10 /* alphanumeric or '_' */
/* Offsets of the various tables from the base tables pointer, and
total length of the tables. */
@@ -869,48 +874,34 @@ a positive value. */
#define STR_RIGHT_CURLY_BRACKET "}"
#define STR_TILDE "~"
-#define STRING_ACCEPT0 "ACCEPT\0"
-#define STRING_COMMIT0 "COMMIT\0"
-#define STRING_F0 "F\0"
-#define STRING_FAIL0 "FAIL\0"
-#define STRING_MARK0 "MARK\0"
-#define STRING_PRUNE0 "PRUNE\0"
-#define STRING_SKIP0 "SKIP\0"
-#define STRING_THEN "THEN"
-
-#define STRING_atomic0 "atomic\0"
-#define STRING_pla0 "pla\0"
-#define STRING_plb0 "plb\0"
-#define STRING_nla0 "nla\0"
-#define STRING_nlb0 "nlb\0"
-#define STRING_sr0 "sr\0"
-#define STRING_asr0 "asr\0"
-#define STRING_positive_lookahead0 "positive_lookahead\0"
-#define STRING_positive_lookbehind0 "positive_lookbehind\0"
-#define STRING_negative_lookahead0 "negative_lookahead\0"
-#define STRING_negative_lookbehind0 "negative_lookbehind\0"
-#define STRING_script_run0 "script_run\0"
-#define STRING_atomic_script_run "atomic_script_run"
-
-#define STRING_alpha0 "alpha\0"
-#define STRING_lower0 "lower\0"
-#define STRING_upper0 "upper\0"
-#define STRING_alnum0 "alnum\0"
-#define STRING_ascii0 "ascii\0"
-#define STRING_blank0 "blank\0"
-#define STRING_cntrl0 "cntrl\0"
-#define STRING_digit0 "digit\0"
-#define STRING_graph0 "graph\0"
-#define STRING_print0 "print\0"
-#define STRING_punct0 "punct\0"
-#define STRING_space0 "space\0"
-#define STRING_word0 "word\0"
-#define STRING_xdigit "xdigit"
-
-#define STRING_DEFINE "DEFINE"
-#define STRING_VERSION "VERSION"
-#define STRING_WEIRD_STARTWORD "[:<:]]"
-#define STRING_WEIRD_ENDWORD "[:>:]]"
+#define STRING_ACCEPT0 "ACCEPT\0"
+#define STRING_COMMIT0 "COMMIT\0"
+#define STRING_F0 "F\0"
+#define STRING_FAIL0 "FAIL\0"
+#define STRING_MARK0 "MARK\0"
+#define STRING_PRUNE0 "PRUNE\0"
+#define STRING_SKIP0 "SKIP\0"
+#define STRING_THEN "THEN"
+
+#define STRING_alpha0 "alpha\0"
+#define STRING_lower0 "lower\0"
+#define STRING_upper0 "upper\0"
+#define STRING_alnum0 "alnum\0"
+#define STRING_ascii0 "ascii\0"
+#define STRING_blank0 "blank\0"
+#define STRING_cntrl0 "cntrl\0"
+#define STRING_digit0 "digit\0"
+#define STRING_graph0 "graph\0"
+#define STRING_print0 "print\0"
+#define STRING_punct0 "punct\0"
+#define STRING_space0 "space\0"
+#define STRING_word0 "word\0"
+#define STRING_xdigit "xdigit"
+
+#define STRING_DEFINE "DEFINE"
+#define STRING_VERSION "VERSION"
+#define STRING_WEIRD_STARTWORD "[:<:]]"
+#define STRING_WEIRD_ENDWORD "[:>:]]"
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
@@ -1159,48 +1150,34 @@ only. */
#define STR_RIGHT_CURLY_BRACKET "\175"
#define STR_TILDE "\176"
-#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
-#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
-#define STRING_F0 STR_F "\0"
-#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
-#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
-#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
-#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
-#define STRING_THEN STR_T STR_H STR_E STR_N
-
-#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
-#define STRING_pla0 STR_p STR_l STR_a "\0"
-#define STRING_plb0 STR_p STR_l STR_b "\0"
-#define STRING_nla0 STR_n STR_l STR_a "\0"
-#define STRING_nlb0 STR_n STR_l STR_b "\0"
-#define STRING_sr0 STR_s STR_r "\0"
-#define STRING_asr0 STR_a STR_s STR_r "\0"
-#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
-#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
-#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
-#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
-#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
-#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
-
-#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
-#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
-#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
-#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
-#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
-#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
-#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
-#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
-#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
-#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
-#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
-#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
-#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
-#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
-
-#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
-#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
-#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
-#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
+#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
+#define STRING_F0 STR_F "\0"
+#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
+#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
+#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
+#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
+#define STRING_THEN STR_T STR_H STR_E STR_N
+
+#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
+#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
+#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
+#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
+#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
+#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
+#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
+#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
+#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
+#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
+#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
+#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
+
+#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
+#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
+#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
@@ -1508,71 +1485,70 @@ enum {
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
- /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
- immediately after the assertions, with ONCE first, as there's a test for >=
- ONCE for a subpattern that isn't an assertion. The POS versions must
- immediately follow the non-POS versions in each case. */
+ /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
+ assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
+ that isn't an assertion. The POS versions must immediately follow the non-POS
+ versions in each case. */
OP_ONCE, /* 130 Atomic group, contains captures */
- OP_SCRIPT_RUN, /* 131 Non-capture, but check characters' scripts */
- OP_BRA, /* 132 Start of non-capturing bracket */
- OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 134 Start of capturing bracket */
- OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 136 Conditional group */
+ OP_BRA, /* 131 Start of non-capturing bracket */
+ OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 133 Start of capturing bracket */
+ OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 139 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 141 Conditional group, check empty */
+ OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 138 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 142 Used to hold a capture number as condition */
- OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
- OP_RREF, /* 144 Used to hold a recursion number as condition */
- OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
- OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
- OP_TRUE, /* 147 Always true (used by VERSION) */
+ OP_CREF, /* 141 Used to hold a capture number as condition */
+ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
+ OP_RREF, /* 143 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
+ OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
+ OP_TRUE, /* 146 Always true (used by VERSION) */
- OP_BRAZERO, /* 148 These two must remain together and in this */
- OP_BRAMINZERO, /* 149 order. */
- OP_BRAPOSZERO, /* 150 */
+ OP_BRAZERO, /* 147 These two must remain together and in this */
+ OP_BRAMINZERO, /* 148 order. */
+ OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */
- OP_MARK, /* 151 always has an argument */
- OP_PRUNE, /* 152 */
- OP_PRUNE_ARG, /* 153 same, but with argument */
- OP_SKIP, /* 154 */
- OP_SKIP_ARG, /* 155 same, but with argument */
- OP_THEN, /* 156 */
- OP_THEN_ARG, /* 157 same, but with argument */
- OP_COMMIT, /* 158 */
- OP_COMMIT_ARG, /* 159 same, but with argument */
+ OP_MARK, /* 150 always has an argument */
+ OP_PRUNE, /* 151 */
+ OP_PRUNE_ARG, /* 152 same, but with argument */
+ OP_SKIP, /* 153 */
+ OP_SKIP_ARG, /* 154 same, but with argument */
+ OP_THEN, /* 155 */
+ OP_THEN_ARG, /* 156 same, but with argument */
+ OP_COMMIT, /* 157 */
+ OP_COMMIT_ARG, /* 158 same, but with argument */
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
without the need for a special opcode. */
- OP_FAIL, /* 160 */
- OP_ACCEPT, /* 161 */
- OP_ASSERT_ACCEPT, /* 162 Used inside assertions */
- OP_CLOSE, /* 163 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 159 */
+ OP_ACCEPT, /* 160 */
+ OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
+ OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 164 */
+ OP_SKIPZERO, /* 163 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 165 */
+ OP_DEFINE, /* 164 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1620,7 +1596,6 @@ some cases doesn't actually use these names at all). */
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
"Once", \
- "Script run", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@@ -1704,7 +1679,6 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* ONCE */ \
- 1+LINK_SIZE, /* SCRIPT_RUN */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
@@ -1773,8 +1747,6 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */
- int16_t scriptx; /* script extension value */
- int16_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record;
/* UCD access macros */
@@ -1797,7 +1769,6 @@ typedef struct {
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
-#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
/* Header for serialized pcre2 codes. */
@@ -1855,8 +1826,6 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
-#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
-#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_)
#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_)
#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_)
@@ -1878,8 +1847,6 @@ extern const uint8_t PRIV(default_tables)[];
extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
-extern const uint32_t PRIV(ucd_digit_sets)[];
-extern const uint8_t PRIV(ucd_script_sets)[];
extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
@@ -1927,7 +1894,6 @@ is available. */
#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_)
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
-#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_)
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_)
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_)
#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_)
@@ -1942,7 +1908,7 @@ is available. */
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
const compile_block *);
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
- int *, uint32_t, uint32_t, BOOL, compile_block *);
+ int *, uint32_t, BOOL, compile_block *);
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
BOOL, int *);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
@@ -1954,7 +1920,6 @@ extern size_t _pcre2_jit_get_size(void *);
const char * _pcre2_jit_get_target(void);
extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
-extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);
diff --git a/dist2/src/pcre2_intmodedep.h b/dist2/src/pcre2_intmodedep.h
index bf3a2359..62626d0a 100644
--- a/dist2/src/pcre2_intmodedep.h
+++ b/dist2/src/pcre2_intmodedep.h
@@ -585,8 +585,6 @@ typedef struct pcre2_real_match_context {
#endif
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
- int (*substitute_callout)(pcre2_substitute_callout_block *, void *);
- void *substitute_callout_data;
PCRE2_SIZE offset_limit;
uint32_t heap_limit;
uint32_t match_limit;
@@ -658,8 +656,7 @@ typedef struct pcre2_real_match_data {
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
- uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
- uint8_t flags; /* Various flags */
+ uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
diff --git a/dist2/src/pcre2_jit_compile.c b/dist2/src/pcre2_jit_compile.c
index 1f21bfb6..32e985b7 100644
--- a/dist2/src/pcre2_jit_compile.c
+++ b/dist2/src/pcre2_jit_compile.c
@@ -477,22 +477,12 @@ typedef struct compiler_common {
BOOL alt_circumflex;
#ifdef SUPPORT_UNICODE
BOOL utf;
- BOOL invalid_utf;
BOOL use_ucp;
- /* Points to saving area for iref. */
- sljit_s32 iref_ptr;
jump_list *getucd;
- jump_list *getucdtype;
#if PCRE2_CODE_UNIT_WIDTH == 8
jump_list *utfreadchar;
+ jump_list *utfreadchar16;
jump_list *utfreadtype8;
- jump_list *utfpeakcharback;
-#endif
-#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
- jump_list *utfreadchar_invalid;
- jump_list *utfreadnewline_invalid;
- jump_list *utfmoveback_invalid;
- jump_list *utfpeakcharback_invalid;
#endif
#endif /* SUPPORT_UNICODE */
} compiler_common;
@@ -626,183 +616,7 @@ the start pointers when the end of the capturing group has not yet reached. */
#define READ_CHAR_MAX 0x7fffffff
-#define INVALID_UTF_CHAR -1
-#define UNASSIGNED_UTF_CHAR 888
-
-#if defined SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
-
-#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
- { \
- if (ptr[0] <= 0x7f) \
- c = *ptr++; \
- else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
- { \
- c = ptr[1] - 0x80; \
- \
- if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
- { \
- c |= (ptr[0] - 0xc0) << 6; \
- ptr += 2; \
- } \
- else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
- { \
- c = c << 6 | (ptr[2] - 0x80); \
- \
- if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
- { \
- c |= (ptr[0] - 0xe0) << 12; \
- ptr += 3; \
- \
- if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
- { \
- invalid_action; \
- } \
- } \
- else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
- { \
- c = c << 6 | (ptr[3] - 0x80); \
- \
- if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
- { \
- c |= (ptr[0] - 0xf0) << 18; \
- ptr += 4; \
- \
- if (c >= 0x110000 || c < 0x10000) \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- }
-
-#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
- { \
- if (ptr[-1] <= 0x7f) \
- c = *ptr--; \
- else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
- { \
- c = ptr[-1] - 0x80; \
- \
- if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
- { \
- c |= (ptr[-2] - 0xc0) << 6; \
- ptr -= 2; \
- } \
- else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
- { \
- c = c << 6 | (ptr[-2] - 0x80); \
- \
- if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
- { \
- c |= (ptr[-3] - 0xe0) << 12; \
- ptr -= 3; \
- \
- if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
- { \
- invalid_action; \
- } \
- } \
- else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
- { \
- c = c << 6 | (ptr[-3] - 0x80); \
- \
- if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
- { \
- c |= (ptr[-4] - 0xf0) << 18; \
- ptr -= 4; \
- \
- if (c >= 0x110000 || c < 0x10000) \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- } \
- else \
- { \
- invalid_action; \
- } \
- }
-
-#elif PCRE2_CODE_UNIT_WIDTH == 16
-
-#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
- { \
- if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
- c = *ptr++; \
- else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
- { \
- c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
- ptr += 2; \
- } \
- else \
- { \
- invalid_action; \
- } \
- }
-
-#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
- { \
- if (ptr[-1] < 0xd800 || ptr[-1] >= 0xe000) \
- c = *ptr--; \
- else if (ptr[-1] >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
- { \
- c = (((ptr[-2] - 0xd800) << 10) | (ptr[-1] - 0xdc00)) + 0x10000; \
- ptr -= 2; \
- } \
- else \
- { \
- invalid_action; \
- } \
- }
-
-
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-
-#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
- { \
- if (ptr[0] < 0x110000) \
- c = *ptr++; \
- else \
- { \
- invalid_action; \
- } \
- }
-
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
-#endif /* SUPPORT_UNICODE */
+#define INVALID_UTF_CHAR 888
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
{
@@ -902,7 +716,6 @@ switch(*cc)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@@ -1056,16 +869,8 @@ while (cc < ccend)
cc += 1;
break;
- case OP_REFI:
-#ifdef SUPPORT_UNICODE
- if (common->iref_ptr == 0)
- {
- common->iref_ptr = common->ovector_start;
- common->ovector_start += 3 * sizeof(sljit_sw);
- }
-#endif /* SUPPORT_UNICODE */
- /* Fall through. */
case OP_REF:
+ case OP_REFI:
common->optimized_cbracket[GET2(cc, 1)] = 0;
cc += 1 + IMM2_SIZE;
break;
@@ -1570,7 +1375,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -2147,7 +1951,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -2371,14 +2174,14 @@ if (base_reg != TMP2)
else
{
status.saved_tmp_regs[1] = RETURN_ADDR;
- if (sljit_get_register_index(RETURN_ADDR) == -1)
+ if (sljit_get_register_index (RETURN_ADDR) == -1)
status.tmp_regs[1] = STR_PTR;
else
status.tmp_regs[1] = RETURN_ADDR;
}
status.saved_tmp_regs[2] = TMP3;
-if (sljit_get_register_index(TMP3) == -1)
+if (sljit_get_register_index (TMP3) == -1)
status.tmp_regs[2] = STR_END;
else
status.tmp_regs[2] = TMP3;
@@ -2471,7 +2274,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -3257,13 +3059,13 @@ return (0 << 8) | bit;
#ifdef SUPPORT_UNICODE
if (common->utf && c > 65535)
{
- if (bit >= (1u << 10))
+ if (bit >= (1 << 10))
bit >>= 10;
else
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
}
#endif /* SUPPORT_UNICODE */
-return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
+return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
}
@@ -3357,152 +3159,95 @@ else
JUMPHERE(jump);
}
-static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
+static void peek_char(compiler_common *common, sljit_u32 max)
{
/* Reads the character into TMP1, keeps STR_PTR.
-Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
+Does not check STR_END. TMP2 Destroyed. */
DEFINE_COMPILER;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_jump *jump;
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
+#endif
SLJIT_UNUSED_ARG(max);
-SLJIT_UNUSED_ARG(dst);
-SLJIT_UNUSED_ARG(dstw);
-SLJIT_UNUSED_ARG(backtracks);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-#ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
if (max < 128) return;
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
- OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
- if (backtracks && common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
+ add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
JUMPHERE(jump);
}
-#elif PCRE2_CODE_UNIT_WIDTH == 16
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
if (common->utf)
{
if (max < 0xd800) return;
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-
- if (common->invalid_utf)
- {
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
- OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
- if (backtracks && common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- }
- else
- {
- /* TMP2 contains the high surrogate. */
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- }
-
+ jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+ /* TMP2 contains the high surrogate. */
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
JUMPHERE(jump);
}
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-if (common->invalid_utf)
+#endif
+}
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+
+static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
+{
+/* Tells whether the character codes below 128 are enough
+to determine a match. */
+const sljit_u8 value = nclass ? 0xff : 0;
+const sljit_u8 *end = bitset + 32;
+
+bitset += 16;
+do
{
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- }
+ if (*bitset++ != value)
+ return FALSE;
}
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
-#endif /* SUPPORT_UNICODE */
+while (bitset < end);
+return TRUE;
}
-static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
+static void read_char7_type(compiler_common *common, BOOL full_read)
{
-/* Reads one character back without moving STR_PTR. TMP2 must
-contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
+/* Reads the precise character type of a character into TMP1, if the character
+is less than 128. Otherwise it returns with zero. Does not check STR_END. The
+full_read argument tells whether characters above max are accepted or not. */
DEFINE_COMPILER;
-
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_jump *jump;
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
-SLJIT_UNUSED_ARG(max);
-SLJIT_UNUSED_ARG(backtracks);
+SLJIT_ASSERT(common->utf);
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
-if (common->utf)
- {
- if (max < 128) return;
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
- if (common->invalid_utf)
- {
- add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- }
- else
- add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
- JUMPHERE(jump);
- }
-#elif PCRE2_CODE_UNIT_WIDTH == 16
-if (common->utf)
+if (full_read)
{
- if (max < 0xd800) return;
-
- if (common->invalid_utf)
- {
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
- add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- }
- else
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
- /* TMP2 contains the low surrogate. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- }
- JUMPHERE(jump);
+ jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+ JUMPHERE(jump);
}
-#elif PCRE2_CODE_UNIT_WIDTH == 32
- if (common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
-#endif /* SUPPORT_UNICODE */
}
-#define READ_CHAR_UPDATE_STR_PTR 0x1
-#define READ_CHAR_UTF8_NEWLINE 0x2
-#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
-#define READ_CHAR_VALID_UTF 0x4
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
-static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
- jump_list **backtracks, sljit_u32 options)
+static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
{
/* Reads the precise value of a character into TMP1, if the character is
between min and max (c >= min && c <= max). Otherwise it returns with a value
@@ -3515,41 +3260,24 @@ struct sljit_jump *jump;
struct sljit_jump *jump2;
#endif
+SLJIT_UNUSED_ARG(update_str_ptr);
SLJIT_UNUSED_ARG(min);
SLJIT_UNUSED_ARG(max);
-SLJIT_UNUSED_ARG(backtracks);
-SLJIT_UNUSED_ARG(options);
SLJIT_ASSERT(min <= max);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
- if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
-
- if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
- {
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
-
- if (options & READ_CHAR_UTF8_NEWLINE)
- add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
- else
- add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
-
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- JUMPHERE(jump);
- return;
- }
+ if (max < 128 && !update_str_ptr) return;
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
if (min >= 0x10000)
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
- if (options & READ_CHAR_UPDATE_STR_PTR)
+ if (update_str_ptr)
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
@@ -3561,19 +3289,19 @@ if (common->utf)
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
- if (!(options & READ_CHAR_UPDATE_STR_PTR))
+ if (!update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
JUMPHERE(jump2);
- if (options & READ_CHAR_UPDATE_STR_PTR)
+ if (update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
}
else if (min >= 0x800 && max <= 0xffff)
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
- if (options & READ_CHAR_UPDATE_STR_PTR)
+ if (update_str_ptr)
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
@@ -3581,19 +3309,17 @@ if (common->utf)
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- if (!(options & READ_CHAR_UPDATE_STR_PTR))
+ if (!update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
JUMPHERE(jump2);
- if (options & READ_CHAR_UPDATE_STR_PTR)
+ if (update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
}
else if (max >= 0x800)
- {
- add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
- }
+ add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
else if (max < 128)
{
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
@@ -3602,7 +3328,7 @@ if (common->utf)
else
{
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (!(options & READ_CHAR_UPDATE_STR_PTR))
+ if (!update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
else
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
@@ -3610,141 +3336,51 @@ if (common->utf)
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- if (options & READ_CHAR_UPDATE_STR_PTR)
+ if (update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
}
JUMPHERE(jump);
}
-#elif PCRE2_CODE_UNIT_WIDTH == 16
+#endif
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
if (common->utf)
{
- if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
-
- if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
-
- if (options & READ_CHAR_UTF8_NEWLINE)
- add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
- else
- add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
-
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- JUMPHERE(jump);
- return;
- }
-
if (max >= 0x10000)
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
+ jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
/* TMP2 contains the high surrogate. */
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
JUMPHERE(jump);
return;
}
+ if (max < 0xd800 && !update_str_ptr) return;
+
/* Skip low surrogate if necessary. */
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-
- if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
- {
- if (options & READ_CHAR_UPDATE_STR_PTR)
- OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
- if (options & READ_CHAR_UPDATE_STR_PTR)
- CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
- if (max >= 0xd800)
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
- }
- else
- {
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
- if (options & READ_CHAR_UPDATE_STR_PTR)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (max >= 0xd800)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
- JUMPHERE(jump);
- }
- }
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-if (common->invalid_utf)
- {
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- }
- }
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
-#endif /* SUPPORT_UNICODE */
-}
-
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
-
-static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
-{
-/* Tells whether the character codes below 128 are enough
-to determine a match. */
-const sljit_u8 value = nclass ? 0xff : 0;
-const sljit_u8 *end = bitset + 32;
-
-bitset += 16;
-do
- {
- if (*bitset++ != value)
- return FALSE;
+ jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+ if (update_str_ptr)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ if (max >= 0xd800)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
+ JUMPHERE(jump);
}
-while (bitset < end);
-return TRUE;
+#endif
}
-static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
+static SLJIT_INLINE void read_char(compiler_common *common)
{
-/* Reads the precise character type of a character into TMP1, if the character
-is less than 128. Otherwise it returns with zero. Does not check STR_END. The
-full_read argument tells whether characters above max are accepted or not. */
-DEFINE_COMPILER;
-struct sljit_jump *jump;
-
-SLJIT_ASSERT(common->utf);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-/* All values > 127 are zero in ctypes. */
-OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
-
-if (negated)
- {
- jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
-
- if (common->invalid_utf)
- {
- add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- }
- else
- {
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- }
- JUMPHERE(jump);
- }
+read_char_range(common, 0, READ_CHAR_MAX, TRUE);
}
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
-
-static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
+static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
{
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
DEFINE_COMPILER;
@@ -3755,8 +3391,7 @@ struct sljit_jump *jump;
struct sljit_jump *jump2;
#endif
-SLJIT_UNUSED_ARG(backtracks);
-SLJIT_UNUSED_ARG(negated);
+SLJIT_UNUSED_ARG(update_str_ptr);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -3764,38 +3399,18 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
- /* The result of this read may be unused, but saves an "else" part. */
+ /* This can be an extra read in some situations, but hopefully
+ it is needed in most cases. */
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
-
- if (!negated)
+ jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+ if (!update_str_ptr)
{
- if (common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
- if (common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
-
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
- if (common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
-
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- JUMPHERE(jump2);
- }
- else if (common->invalid_utf)
- {
- add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
-
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+ OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
@@ -3803,98 +3418,43 @@ if (common->utf)
}
else
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
-
JUMPHERE(jump);
return;
}
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
-if (common->invalid_utf && negated)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
-
#if PCRE2_CODE_UNIT_WIDTH != 8
/* The ctypes array contains only 256 values. */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
-#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
+#endif
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
#if PCRE2_CODE_UNIT_WIDTH != 8
JUMPHERE(jump);
-#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
+#endif
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
-if (common->utf && negated)
+if (common->utf && update_str_ptr)
{
/* Skip low surrogate if necessary. */
- if (!common->invalid_utf)
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
-
- if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0)
- {
- OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
- CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
- }
- else
- {
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(jump);
- }
- return;
- }
-
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
-
JUMPHERE(jump);
- return;
}
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
}
-static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
+static void skip_char_back(compiler_common *common)
{
-/* Goes one character back. TMP2 must contain the start of
-the subject buffer. Affects STR_PTR and TMP1. Does not modify
-STR_PTR for invalid character sequences. */
+/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
DEFINE_COMPILER;
-
-SLJIT_UNUSED_ARG(backtracks);
-SLJIT_UNUSED_ARG(must_be_valid);
-
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-struct sljit_jump *jump;
-#endif
-
-#ifdef SUPPORT_UNICODE
#if PCRE2_CODE_UNIT_WIDTH == 8
struct sljit_label *label;
if (common->utf)
{
- if (!must_be_valid && common->invalid_utf)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
- add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
- JUMPHERE(jump);
- return;
- }
-
label = LABEL();
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -3907,45 +3467,16 @@ if (common->utf)
{
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
- if (!must_be_valid && common->invalid_utf)
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
- add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
- if (backtracks != NULL)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
- JUMPHERE(jump);
- return;
- }
-
/* Skip low surrogate if necessary. */
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- return;
- }
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-if (common->invalid_utf && !must_be_valid)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
- if (backtracks != NULL)
- {
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- return;
- }
-
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
return;
}
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
-#endif /* SUPPORT_UNICODE */
+#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
@@ -3988,12 +3519,13 @@ else
static void do_utfreadchar(compiler_common *common)
{
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
-of the character (>= 0xc0). Return char value in TMP1. */
+of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
DEFINE_COMPILER;
struct sljit_jump *jump;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -4002,12 +3534,13 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
-OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -4015,694 +3548,95 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
jump = JUMP(SLJIT_NOT_ZERO);
/* Three byte sequence. */
-OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
/* Four byte sequence. */
JUMPHERE(jump);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
-OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-static void do_utfreadtype8(compiler_common *common)
+static void do_utfreadchar16(compiler_common *common)
{
-/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
+/* Fast decoding a UTF-8 character. TMP1 contains the first byte
of the character (>= 0xc0). Return value in TMP1. */
DEFINE_COMPILER;
struct sljit_jump *jump;
-struct sljit_jump *compare;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
-jump = JUMP(SLJIT_NOT_ZERO);
-/* Two byte sequence. */
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
-/* The upper 5 bits are known at this point. */
-compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
-OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
-OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(compare);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* We only have types for characters less than 256. */
-JUMPHERE(jump);
-OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfreadchar_invalid(compiler_common *common)
-{
-/* Slow decoding a UTF-8 character. TMP1 contains the first byte
-of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
-undefined for invalid characters. */
-DEFINE_COMPILER;
-sljit_s32 i;
-sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
-struct sljit_jump *jump;
-struct sljit_jump *buffer_end_close;
-struct sljit_label *three_byte_entry;
-struct sljit_label *exit_invalid_label;
-struct sljit_jump *exit_invalid[11];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
-
-/* Usually more than 3 characters remained in the subject buffer. */
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
-
-/* Not a valid start of a multi-byte sequence, no more bytes read. */
-exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
-
-buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
-jump = JUMP(SLJIT_NOT_ZERO);
-
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump);
-
-/* Three-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
- exit_invalid[2] = NULL;
- }
-else
- exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-jump = JUMP(SLJIT_NOT_ZERO);
-
-three_byte_entry = LABEL();
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
- exit_invalid[3] = NULL;
- }
-else
- exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- exit_invalid[4] = NULL;
- }
-else
- exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump);
-
-/* Four-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
- exit_invalid[5] = NULL;
- }
-else
- exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
- exit_invalid[6] = NULL;
- }
-else
- exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
-
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(buffer_end_close);
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
-
-/* Two-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
-exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
+/* Searching for the first zero. */
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
-
+/* Two byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-/* Three-byte sequence. */
JUMPHERE(jump);
-exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
+/* This code runs only in 8 bit mode. No need to shift the value. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- exit_invalid[10] = NULL;
- }
-else
- exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-
-/* One will be substracted from STR_PTR later. */
+/* Three byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-
-/* Four byte sequences are not possible. */
-CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
-
-exit_invalid_label = LABEL();
-for (i = 0; i < 11; i++)
- sljit_set_label(exit_invalid[i], exit_invalid_label);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfreadnewline_invalid(compiler_common *common)
-{
-/* Slow decoding a UTF-8 character, specialized for newlines.
-TMP1 contains the first byte of the character (>= 0xc0). Return
-char value in TMP1. */
-DEFINE_COMPILER;
-struct sljit_label *loop;
-struct sljit_label *skip_start;
-struct sljit_label *three_byte_exit;
-struct sljit_jump *jump[5];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-if (common->nltype != NLTYPE_ANY)
- {
- SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
-
- /* All newlines are ascii, just skip intermediate octets. */
- jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- loop = LABEL();
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
- JUMPHERE(jump[0]);
-
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- return;
- }
-
-jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
-jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
-
-skip_start = LABEL();
-OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
-jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
-
-/* Skip intermediate octets. */
-loop = LABEL();
-jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
-CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
-
-JUMPHERE(jump[3]);
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-three_byte_exit = LABEL();
-JUMPHERE(jump[0]);
-JUMPHERE(jump[4]);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* Two byte long newline: 0x85. */
-JUMPHERE(jump[1]);
-CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* Three byte long newlines: 0x2028 and 0x2029. */
-JUMPHERE(jump[2]);
-CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
-CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
-CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
-
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-static void do_utfmoveback_invalid(compiler_common *common)
+static void do_utfreadtype8(compiler_common *common)
{
-/* Goes one character back. */
+/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
+of the character (>= 0xc0). Return value in TMP1. */
DEFINE_COMPILER;
-sljit_s32 i;
struct sljit_jump *jump;
-struct sljit_jump *buffer_start_close;
-struct sljit_label *exit_ok_label;
-struct sljit_label *exit_invalid_label;
-struct sljit_jump *exit_invalid[7];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
-exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
-
-/* Two-byte sequence. */
-buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
-jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* Three-byte sequence. */
-JUMPHERE(jump);
-exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
-jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* Four-byte sequence. */
-JUMPHERE(jump);
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
-exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
-exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
-
-exit_ok_label = LABEL();
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-/* Two-byte sequence. */
-JUMPHERE(buffer_start_close);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-
-exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
-CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
-
-/* Three-byte sequence. */
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
-exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
-CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
-
-/* Four-byte sequences are not possible. */
-
-exit_invalid_label = LABEL();
-sljit_set_label(exit_invalid[5], exit_invalid_label);
-sljit_set_label(exit_invalid[6], exit_invalid_label);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(exit_invalid[4]);
-/* -2 + 4 = 2 */
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-
-exit_invalid_label = LABEL();
-for (i = 0; i < 4; i++)
- sljit_set_label(exit_invalid[i], exit_invalid_label);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfpeakcharback(compiler_common *common)
-{
-/* Peak a character back. */
-DEFINE_COMPILER;
-struct sljit_jump *jump[2];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
-jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
-jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-JUMPHERE(jump[1]);
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-JUMPHERE(jump[0]);
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfpeakcharback_invalid(compiler_common *common)
-{
-/* Peak a character back. */
-DEFINE_COMPILER;
-sljit_s32 i;
-sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
-struct sljit_jump *jump[2];
-struct sljit_label *two_byte_entry;
-struct sljit_label *three_byte_entry;
-struct sljit_label *exit_invalid_label;
-struct sljit_jump *exit_invalid[8];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
-exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
-jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
-
-/* Two-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
-jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
-
-two_byte_entry = LABEL();
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
-/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump[1]);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-/* Three-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
-jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
-
-three_byte_entry = LABEL();
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
- exit_invalid[2] = NULL;
- }
-else
- exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- exit_invalid[3] = NULL;
- }
-else
- exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump[1]);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
-exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-/* Four-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
-/* ADD is used instead of OR because of the SUB 0x10000 above. */
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-
-if (has_cmov)
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
- exit_invalid[5] = NULL;
- }
-else
- exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
-
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump[0]);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
-jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
-
-/* Two-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
-CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
-
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
-exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-/* Three-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
-CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(jump[0]);
-exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
-
-/* Two-byte sequence. */
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
-CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
-
-exit_invalid_label = LABEL();
-for (i = 0; i < 8; i++)
- sljit_set_label(exit_invalid[i], exit_invalid_label);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
-
-#if PCRE2_CODE_UNIT_WIDTH == 16
-
-static void do_utfreadchar_invalid(compiler_common *common)
-{
-/* Slow decoding a UTF-16 character. TMP1 contains the first half
-of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
-undefined for invalid characters. */
-DEFINE_COMPILER;
-struct sljit_jump *exit_invalid[3];
+struct sljit_jump *compare;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-/* TMP2 contains the high surrogate. */
-exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+jump = JUMP(SLJIT_NOT_ZERO);
+/* Two byte sequence. */
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
-exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
-
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(exit_invalid[0]);
-JUMPHERE(exit_invalid[1]);
-JUMPHERE(exit_invalid[2]);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfreadnewline_invalid(compiler_common *common)
-{
-/* Slow decoding a UTF-16 character, specialized for newlines.
-TMP1 contains the first half of the character (>= 0xd800). Return
-char value in TMP1. */
-
-DEFINE_COMPILER;
-struct sljit_jump *exit_invalid[2];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-/* TMP2 contains the high surrogate. */
-exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
-
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
-OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(exit_invalid[0]);
-JUMPHERE(exit_invalid[1]);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfmoveback_invalid(compiler_common *common)
-{
-/* Goes one character back. */
-DEFINE_COMPILER;
-struct sljit_jump *exit_invalid[3];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
-
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
-
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
+/* The upper 5 bits are known at this point. */
+compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-JUMPHERE(exit_invalid[0]);
-JUMPHERE(exit_invalid[1]);
-JUMPHERE(exit_invalid[2]);
-
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPHERE(compare);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_utfpeakcharback_invalid(compiler_common *common)
-{
-/* Peak a character back. */
-DEFINE_COMPILER;
-struct sljit_jump *jump;
-struct sljit_jump *exit_invalid[3];
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
-exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
-exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
-OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
-exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+/* We only have types for characters less than 256. */
JUMPHERE(jump);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-
-JUMPHERE(exit_invalid[0]);
-JUMPHERE(exit_invalid[1]);
-JUMPHERE(exit_invalid[2]);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
+OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
#define UCD_BLOCK_MASK 127
@@ -4719,52 +3653,12 @@ struct sljit_jump *jump;
#if defined SLJIT_DEBUG && SLJIT_DEBUG
/* dummy_ucd_record */
-const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
-SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
-SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
-#endif
-
-SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
-
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-
-#if PCRE2_CODE_UNIT_WIDTH == 32
-if (!common->utf)
- {
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
- JUMPHERE(jump);
- }
-#endif
-
-OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
-OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
-OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
-OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
-
-static void do_getucdtype(compiler_common *common)
-{
-/* Search the UCD record for the character comes in TMP1.
-Returns chartype in TMP1 and UCD offset in TMP2. */
-DEFINE_COMPILER;
-#if PCRE2_CODE_UNIT_WIDTH == 32
-struct sljit_jump *jump;
-#endif
-
-#if defined SLJIT_DEBUG && SLJIT_DEBUG
-/* dummy_ucd_record */
-const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
-SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
+const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
+SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
#endif
-SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
+SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
@@ -4772,7 +3666,7 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
if (!common->utf)
{
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
JUMPHERE(jump);
}
#endif
@@ -4785,19 +3679,8 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
-
-// PH hacking
-//fprintf(stderr, "~~A\n");
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
-
-// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -4812,9 +3695,8 @@ struct sljit_jump *start;
struct sljit_jump *end = NULL;
struct sljit_jump *end2 = NULL;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-struct sljit_label *loop;
-struct sljit_jump *jump;
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
+struct sljit_jump *singlechar;
+#endif
jump_list *newline = NULL;
sljit_u32 overall_options = common->re->overall_options;
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
@@ -4851,7 +3733,7 @@ if ((overall_options & PCRE2_FIRSTLINE) != 0)
mainloop = LABEL();
/* Continual stores does not cause data dependency. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
- read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
check_newlinechar(common, common->nltype, &newline, TRUE);
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
JUMPHERE(end);
@@ -4871,9 +3753,11 @@ else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
-#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
-#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
+#if PCRE2_CODE_UNIT_WIDTH == 16
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
+#endif
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
@@ -4897,7 +3781,7 @@ if (newlinecheck)
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
-#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
+#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
end2 = JUMP(SLJIT_JUMP);
}
@@ -4905,9 +3789,9 @@ if (newlinecheck)
mainloop = LABEL();
/* Increasing the STR_PTR here requires one less jump in the most common case. */
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-if (common->utf && !common->invalid_utf) readuchar = TRUE;
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
+#ifdef SUPPORT_UNICODE
+if (common->utf) readuchar = TRUE;
+#endif
if (newlinecheck) readuchar = TRUE;
if (readuchar)
@@ -4919,55 +3803,23 @@ if (newlinecheck)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#if PCRE2_CODE_UNIT_WIDTH == 8
-if (common->invalid_utf)
- {
- /* Skip continuation code units. */
- loop = LABEL();
- jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(jump);
- }
-else if (common->utf)
+if (common->utf)
{
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+ singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- JUMPHERE(jump);
+ JUMPHERE(singlechar);
}
#elif PCRE2_CODE_UNIT_WIDTH == 16
-if (common->invalid_utf)
- {
- /* Skip continuation code units. */
- loop = LABEL();
- jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(jump);
- }
-else if (common->utf)
+if (common->utf)
{
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
-
- if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
- CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
- }
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- }
+ singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+ JUMPHERE(singlechar);
}
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
@@ -5453,16 +4305,16 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
{
-sljit_u32 value = chr;
+sljit_s32 value = (sljit_s32)chr;
#if PCRE2_CODE_UNIT_WIDTH == 8
#define SSE2_COMPARE_TYPE_INDEX 0
-return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
+return (value << 24) | (value << 16) | (value << 8) | value;
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define SSE2_COMPARE_TYPE_INDEX 1
-return (sljit_s32)((value << 16) | value);
+return (value << 16) | value;
#elif PCRE2_CODE_UNIT_WIDTH == 32
#define SSE2_COMPARE_TYPE_INDEX 2
-return (sljit_s32)(value);
+return value;
#else
#error "Unsupported unit width"
#endif
@@ -6268,7 +5120,7 @@ for (i = 0; i < max; i++)
}
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
-if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max))
+if (check_fast_forward_char_pair_sse2(common, chars, max))
return TRUE;
#endif
@@ -6504,15 +5356,14 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
}
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-/* Example: match /^/ to \r\n from offset 1. */
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
-move_back(common, NULL, FALSE);
+skip_char_back(common);
loop = LABEL();
common->ff_newline_shortcut = loop;
-read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
+read_char_range(common, common->nlmin, common->nlmax, TRUE);
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
@@ -6693,7 +5544,7 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
-if (sljit_get_register_index(TMP3) < 0)
+if (sljit_get_register_index (TMP3) < 0)
{
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
@@ -6718,7 +5569,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
-if (sljit_get_register_index(TMP3) < 0)
+if (sljit_get_register_index (TMP3) < 0)
{
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
@@ -6737,29 +5588,21 @@ static void check_wordboundary(compiler_common *common)
DEFINE_COMPILER;
struct sljit_jump *skipread;
jump_list *skipread_list = NULL;
-jump_list *invalid_utf = NULL;
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
struct sljit_jump *jump;
-#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
+#endif
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-/* Get type of the previous char, and put it to TMP3. */
+/* Get type of the previous char, and put it to LOCALS1. */
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
-OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
-skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
-
-if (common->mode == PCRE2_JIT_COMPLETE)
- peek_char_back(common, READ_CHAR_MAX, &invalid_utf);
-else
- {
- move_back(common, &invalid_utf, FALSE);
- check_start_used_ptr(common);
- /* No need precise read since match fails anyway. */
- read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR);
- }
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
+skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
+skip_char_back(common);
+check_start_used_ptr(common);
+read_char(common);
/* Testing char type. */
#ifdef SUPPORT_UNICODE
@@ -6767,7 +5610,7 @@ if (common->use_ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
- add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
+ add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
@@ -6775,22 +5618,23 @@ if (common->use_ucp)
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
}
else
-#endif /* SUPPORT_UNICODE */
+#endif
{
#if PCRE2_CODE_UNIT_WIDTH != 8
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
#elif defined SUPPORT_UNICODE
- /* Here TMP3 has already been zeroed. */
+ /* Here LOCALS1 has already been zeroed. */
jump = NULL;
if (common->utf)
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
- OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
#if PCRE2_CODE_UNIT_WIDTH != 8
JUMPHERE(jump);
#elif defined SUPPORT_UNICODE
@@ -6802,7 +5646,7 @@ JUMPHERE(skipread);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
check_str_end(common, &skipread_list);
-peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf);
+peek_char(common, READ_CHAR_MAX);
/* Testing char type. This is a code duplication. */
#ifdef SUPPORT_UNICODE
@@ -6810,7 +5654,7 @@ if (common->use_ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
- add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
+ add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
@@ -6820,7 +5664,7 @@ if (common->use_ucp)
JUMPHERE(jump);
}
else
-#endif /* SUPPORT_UNICODE */
+#endif
{
#if PCRE2_CODE_UNIT_WIDTH != 8
/* TMP2 may be destroyed by peek_char. */
@@ -6844,22 +5688,8 @@ else
}
set_jumps(skipread_list, LABEL());
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
-sljit_emit_fast_return(compiler, TMP1, 0);
-
-#ifdef SUPPORT_UNICODE
-if (common->invalid_utf)
- {
- SLJIT_ASSERT(invalid_utf != NULL);
-
- set_jumps(invalid_utf, LABEL());
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
- sljit_emit_fast_return(compiler, TMP1, 0);
- return;
- }
-#endif /* SUPPORT_UNICODE */
+OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
@@ -7026,6 +5856,9 @@ int i, j, k, len, c;
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
return FALSE;
+if (invert)
+ nclass = !nclass;
+
len = 0;
for (i = 0; i < 32; i++)
@@ -7107,9 +5940,6 @@ if (j != 0)
}
}
-if (invert)
- nclass = !nclass;
-
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
return TRUE;
@@ -7395,6 +6225,37 @@ OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
sljit_emit_fast_return(compiler, TMP1, 0);
}
+#if defined SUPPORT_UNICODE
+
+static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, PCRE2_SPTR src2, PCRE2_SPTR end1, PCRE2_SPTR end2)
+{
+/* This function would be ineffective to do in JIT level. */
+sljit_u32 c1, c2;
+const ucd_record *ur;
+const sljit_u32 *pp;
+
+while (src1 < end1)
+ {
+ if (src2 >= end2)
+ return (PCRE2_SPTR)1;
+ GETCHARINC(c1, src1);
+ GETCHARINC(c2, src2);
+ ur = GET_UCD(c2);
+ if (c1 != c2 && c1 != c2 + ur->other_case)
+ {
+ pp = PRIV(ucd_caseless_sets) + ur->caseset;
+ for (;;)
+ {
+ if (c1 < *pp) return NULL;
+ if (c1 == *pp++) break;
+ }
+ }
+ }
+return src2;
+}
+
+#endif /* SUPPORT_UNICODE */
+
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
compare_context *context, jump_list **backtracks)
{
@@ -7436,7 +6297,7 @@ if (context->sourcereg == -1)
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
#elif PCRE2_CODE_UNIT_WIDTH == 16
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
@@ -7583,7 +6444,7 @@ PCRE2_SPTR ccbegin;
int compares, invertcmp, numberofcmps;
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
BOOL utf = common->utf;
-#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+#endif
#ifdef SUPPORT_UNICODE
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
@@ -7591,7 +6452,7 @@ BOOL charsaved = FALSE;
int typereg = TMP1;
const sljit_u32 *other_cases;
sljit_uw typeoffset;
-#endif /* SUPPORT_UNICODE */
+#endif
/* Scanning the necessary info. */
cc++;
@@ -7615,7 +6476,7 @@ while (*cc != XCL_END)
if (c < min) min = c;
#ifdef SUPPORT_UNICODE
needschar = TRUE;
-#endif /* SUPPORT_UNICODE */
+#endif
}
else if (*cc == XCL_RANGE)
{
@@ -7626,7 +6487,7 @@ while (*cc != XCL_END)
if (c > max) max = c;
#ifdef SUPPORT_UNICODE
needschar = TRUE;
-#endif /* SUPPORT_UNICODE */
+#endif
}
#ifdef SUPPORT_UNICODE
else
@@ -7694,16 +6555,13 @@ while (*cc != XCL_END)
}
cc += 2;
}
-#endif /* SUPPORT_UNICODE */
+#endif
}
SLJIT_ASSERT(compares > 0);
/* We are not necessary in utf mode even in 8 bit mode. */
cc = ccbegin;
-if ((cc[-1] & XCL_NOT) != 0)
- read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
-else
- read_char(common, min, max, NULL, 0);
+read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
if ((cc[-1] & XCL_HASPROP) == 0)
{
@@ -7736,13 +6594,13 @@ else if ((cc[-1] & XCL_MAP) != 0)
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
#ifdef SUPPORT_UNICODE
charsaved = TRUE;
-#endif /* SUPPORT_UNICODE */
+#endif
if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
{
#if PCRE2_CODE_UNIT_WIDTH == 8
jump = NULL;
if (common->utf)
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+#endif
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
@@ -7754,7 +6612,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+#endif
JUMPHERE(jump);
}
@@ -7772,10 +6630,10 @@ if (needstype || needsscript)
if (!common->utf)
{
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
JUMPHERE(jump);
}
-#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+#endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
@@ -7789,18 +6647,8 @@ if (needstype || needsscript)
/* Before anything else, we deal with scripts. */
if (needsscript)
{
-// PH hacking
-//fprintf(stderr, "~~B\n");
-
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
-
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
-
- // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
ccbegin = cc;
@@ -7838,49 +6686,33 @@ if (needstype || needsscript)
}
if (needschar)
+ {
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ }
if (needstype)
{
if (!needschar)
{
-// PH hacking
-//fprintf(stderr, "~~C\n");
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
-
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
-
-// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
}
else
{
-// PH hacking
-//fprintf(stderr, "~~D\n");
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
-
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
-
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
-
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
typereg = RETURN_ADDR;
}
}
}
-#endif /* SUPPORT_UNICODE */
+#endif
/* Generating code. */
charoffset = 0;
numberofcmps = 0;
#ifdef SUPPORT_UNICODE
typeoffset = 0;
-#endif /* SUPPORT_UNICODE */
+#endif
while (*cc != XCL_END)
{
@@ -8147,7 +6979,7 @@ while (*cc != XCL_END)
}
cc += 2;
}
-#endif /* SUPPORT_UNICODE */
+#endif
if (jump != NULL)
add_jump(compiler, compares > 0 ? list : backtracks, jump);
@@ -8188,15 +7020,6 @@ switch(type)
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
-#ifdef SUPPORT_UNICODE
- if (common->invalid_utf)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
- add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS));
- add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
- return cc;
- }
-#endif /* SUPPORT_UNICODE */
sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
@@ -8255,13 +7078,13 @@ switch(type)
}
else
{
- OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
- read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
}
JUMPHERE(jump[2]);
JUMPHERE(jump[3]);
@@ -8320,7 +7143,7 @@ switch(type)
}
else
{
- peek_char(common, common->nlmax, TMP3, 0, NULL);
+ peek_char(common, common->nlmax);
check_newlinechar(common, common->nltype, backtracks, FALSE);
}
JUMPHERE(jump[0]);
@@ -8335,10 +7158,10 @@ switch(type)
return cc;
case OP_CIRCM:
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+ jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
+ OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
jump[0] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[1]);
@@ -8348,8 +7171,8 @@ switch(type)
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
- OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
+ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
@@ -8357,7 +7180,8 @@ switch(type)
}
else
{
- peek_char_back(common, common->nlmax, backtracks);
+ skip_char_back(common);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
check_newlinechar(common, common->nltype, backtracks, FALSE);
}
JUMPHERE(jump[0]);
@@ -8371,12 +7195,12 @@ switch(type)
#ifdef SUPPORT_UNICODE
if (common->utf)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
- move_back(common, backtracks, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
+ skip_char_back(common);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
@@ -8401,28 +7225,21 @@ static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
{
PCRE2_SPTR start_subject = args->begin;
PCRE2_SPTR end_subject = args->end;
-int lgb, rgb, ricount;
-PCRE2_SPTR prevcc, startcc, bptr;
-BOOL first = TRUE;
+int lgb, rgb, len, ricount;
+PCRE2_SPTR prevcc, bptr;
uint32_t c;
prevcc = cc;
-startcc = NULL;
-do
+GETCHARINC(c, cc);
+lgb = UCD_GRAPHBREAK(c);
+
+while (cc < end_subject)
{
- GETCHARINC(c, cc);
+ len = 1;
+ GETCHARLEN(c, cc, len);
rgb = UCD_GRAPHBREAK(c);
- if (first)
- {
- lgb = rgb;
- startcc = cc;
- first = FALSE;
- continue;
- }
-
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
- break;
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
@@ -8439,8 +7256,7 @@ do
BACKCHAR(bptr);
GETCHAR(c, bptr);
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
- break;
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
ricount++;
}
@@ -8455,80 +7271,14 @@ do
lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
- prevcc = startcc;
- startcc = cc;
- }
-while (cc < end_subject);
-
-return startcc;
-}
-
-static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
-{
-PCRE2_SPTR start_subject = args->begin;
-PCRE2_SPTR end_subject = args->end;
-int lgb, rgb, ricount;
-PCRE2_SPTR prevcc, startcc, bptr;
-BOOL first = TRUE;
-uint32_t c;
-
-prevcc = cc;
-startcc = NULL;
-do
- {
- GETCHARINC_INVALID(c, cc, end_subject, break);
- rgb = UCD_GRAPHBREAK(c);
-
- if (first)
- {
- lgb = rgb;
- startcc = cc;
- first = FALSE;
- continue;
- }
-
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
- break;
-
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- ricount = 0;
- bptr = prevcc;
-
- /* bptr is pointing to the left-hand character */
- while (bptr > start_subject)
- {
- GETCHARBACK_INVALID(c, bptr, start_subject, break);
-
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
- break;
-
- ricount++;
- }
-
- if ((ricount & 1) != 0)
- break; /* Grapheme break required */
- }
-
- /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
- allows any number of them before a following Extended_Pictographic. */
-
- if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
- lgb != ucp_gbExtended_Pictographic)
- lgb = rgb;
-
- prevcc = startcc;
- startcc = cc;
+ prevcc = cc;
+ cc += len;
}
-while (cc < end_subject);
-return startcc;
+return cc;
}
-#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+#endif
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
{
@@ -8539,23 +7289,14 @@ PCRE2_SPTR bptr;
uint32_t c;
GETCHARINC(c, cc);
-#if PCRE2_CODE_UNIT_WIDTH == 32
-if (c >= 0x110000)
- return NULL;
-#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);
while (cc < end_subject)
{
c = *cc;
-#if PCRE2_CODE_UNIT_WIDTH == 32
- if (c >= 0x110000)
- break;
-#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
- break;
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
@@ -8570,18 +7311,13 @@ while (cc < end_subject)
{
bptr--;
c = *bptr;
-#if PCRE2_CODE_UNIT_WIDTH == 32
- if (c >= 0x110000)
- break;
-#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
ricount++;
}
- if ((ricount & 1) != 0)
- break; /* Grapheme break required */
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
}
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
@@ -8597,7 +7333,7 @@ while (cc < end_subject)
return cc;
}
-#endif /* SUPPORT_UNICODE */
+#endif
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
{
@@ -8620,10 +7356,10 @@ switch(type)
detect_partial_match(common, backtracks);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
- read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
+ read_char7_type(common, type == OP_NOT_DIGIT);
else
#endif
- read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
+ read_char8_type(common, type == OP_NOT_DIGIT);
/* Flip the starting bit in the negative case. */
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
@@ -8635,10 +7371,10 @@ switch(type)
detect_partial_match(common, backtracks);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
- read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
+ read_char7_type(common, type == OP_NOT_WHITESPACE);
else
#endif
- read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
+ read_char8_type(common, type == OP_NOT_WHITESPACE);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -8649,10 +7385,10 @@ switch(type)
detect_partial_match(common, backtracks);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
- read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
+ read_char7_type(common, type == OP_NOT_WORDCHAR);
else
#endif
- read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
+ read_char8_type(common, type == OP_NOT_WORDCHAR);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -8660,7 +7396,7 @@ switch(type)
case OP_ANY:
if (check_str_ptr)
detect_partial_match(common, backtracks);
- read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
@@ -8682,18 +7418,12 @@ switch(type)
case OP_ALLANY:
if (check_str_ptr)
detect_partial_match(common, backtracks);
-#ifdef SUPPORT_UNICODE
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
- if (common->invalid_utf)
- {
- read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
- return cc;
- }
-
-#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
#if PCRE2_CODE_UNIT_WIDTH == 8
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
@@ -8705,12 +7435,12 @@ switch(type)
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+#endif
JUMPHERE(jump[0]);
- return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
+ return cc;
}
-#endif /* SUPPORT_UNICODE */
+#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
return cc;
@@ -8737,7 +7467,7 @@ switch(type)
case OP_ANYNL:
if (check_str_ptr)
detect_partial_match(common, backtracks);
- read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
+ read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
/* We don't need to handle soft partial matching case. */
end_list = NULL;
@@ -8760,12 +7490,7 @@ switch(type)
case OP_HSPACE:
if (check_str_ptr)
detect_partial_match(common, backtracks);
-
- if (type == OP_NOT_HSPACE)
- read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
- else
- read_char(common, 0x9, 0x3000, NULL, 0);
-
+ read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -8775,12 +7500,7 @@ switch(type)
case OP_VSPACE:
if (check_str_ptr)
detect_partial_match(common, backtracks);
-
- if (type == OP_NOT_VSPACE)
- read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
- else
- read_char(common, 0xa, 0x2029, NULL, 0);
-
+ read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -8796,12 +7516,9 @@ switch(type)
#if PCRE2_CODE_UNIT_WIDTH != 32
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- if (common->invalid_utf)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+ common->utf ? SLJIT_FUNC_OFFSET(do_extuni_utf) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
@@ -8822,15 +7539,11 @@ switch(type)
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
#endif
-
- if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
- detect_partial_match(common, backtracks);
-
- if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
+ if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
+ && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
{
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
context.length = IN_UCHARS(length);
context.sourcereg = -1;
@@ -8840,6 +7553,8 @@ switch(type)
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
}
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#ifdef SUPPORT_UNICODE
if (common->utf)
{
@@ -8849,28 +7564,24 @@ switch(type)
#endif
c = *cc;
- SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
-
- if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-
- oc = char_othercase(common, c);
- read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
-
- SLJIT_ASSERT(!is_powerof2(c ^ oc));
-
- if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+ if (type == OP_CHAR || !char_has_othercase(common, cc))
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
- CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
+ read_char_range(common, c, c, FALSE);
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+ return cc + length;
}
- else
+ oc = char_othercase(common, c);
+ read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
+ bit = c ^ oc;
+ if (is_powerof2(bit))
{
- jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
- JUMPHERE(jump[0]);
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+ return cc + length;
}
+ jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+ JUMPHERE(jump[0]);
return cc + length;
case OP_NOT:
@@ -8884,7 +7595,7 @@ switch(type)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
c = *cc;
- if (c < 128 && !common->invalid_utf)
+ if (c < 128)
{
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
if (type == OP_NOT || !char_has_othercase(common, cc))
@@ -8915,13 +7626,13 @@ switch(type)
if (type == OP_NOT || !char_has_othercase(common, cc))
{
- read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
+ read_char_range(common, c, c, TRUE);
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
}
else
{
oc = char_othercase(common, c);
- read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
+ read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
bit = c ^ oc;
if (is_powerof2(bit))
{
@@ -8943,15 +7654,9 @@ switch(type)
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
- if (type == OP_NCLASS)
- read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
- else
- read_char(common, 0, bit, NULL, 0);
+ read_char_range(common, 0, bit, type == OP_NCLASS);
#else
- if (type == OP_NCLASS)
- read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
- else
- read_char(common, 0, 255, NULL, 0);
+ read_char_range(common, 0, 255, type == OP_NCLASS);
#endif
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
@@ -9138,14 +7843,6 @@ int offset = 0;
struct sljit_jump *jump = NULL;
struct sljit_jump *partial;
struct sljit_jump *nopartial;
-#if defined SUPPORT_UNICODE
-struct sljit_label *loop;
-struct sljit_label *caseless_loop;
-jump_list *no_match = NULL;
-int source_reg = COUNT_MATCH;
-int source_end_reg = ARGUMENTS;
-int char1_reg = STACK_LIMIT;
-#endif /* SUPPORT_UNICODE */
if (ref)
{
@@ -9161,98 +7858,34 @@ else
#if defined SUPPORT_UNICODE
if (common->utf && *cc == OP_REFI)
{
- SLJIT_ASSERT(common->iref_ptr != 0);
-
+ SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
if (ref)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
else
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
-
- if (withchecks && emptyfail)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
-
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
-
- OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
- OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
-
- loop = LABEL();
- jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
- partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- /* Read original character. It must be a valid UTF character. */
- OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
-
- read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
-
- OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
- OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
-
- /* Read second character. */
- read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
-
- CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
-
-// PH hacking
-//fprintf(stderr, "~~E\n");
-
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
-
- add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
-
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
-
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
-
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
-
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
-
- OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
-
- add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
+ if (withchecks)
+ jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
+ /* No free saved registers so save data on stack. */
- caseless_loop = LABEL();
- OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
- JUMPTO(SLJIT_EQUAL, loop);
- JUMPTO(SLJIT_LESS, caseless_loop);
+ OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
- set_jumps(no_match, LABEL());
if (common->mode == PCRE2_JIT_COMPLETE)
- JUMPHERE(partial);
-
- OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
- OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
- OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
-
- if (common->mode != PCRE2_JIT_COMPLETE)
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
+ else
{
- JUMPHERE(partial);
- OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
- OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
- OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
+ OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
+
+ add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+ nopartial = JUMP(SLJIT_NOT_EQUAL);
+ OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
check_partial(common, FALSE);
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ JUMPHERE(nopartial);
}
-
- JUMPHERE(jump);
- OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
- OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
- OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
- return;
}
else
#endif /* SUPPORT_UNICODE */
@@ -10229,42 +8862,6 @@ if (common->optimized_cbracket[offset >> 1] == 0)
return stacksize;
}
-static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
-{
- if (PRIV(script_run)(ptr, endptr, FALSE))
- return endptr;
- return NULL;
-}
-
-#ifdef SUPPORT_UNICODE
-
-static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
-{
- if (PRIV(script_run)(ptr, endptr, TRUE))
- return endptr;
- return NULL;
-}
-
-#endif /* SUPPORT_UNICODE */
-
-static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
-{
-DEFINE_COMPILER;
-
-SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
-#ifdef SUPPORT_UNICODE
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
-#else
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
-#endif
-
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
-add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
-}
-
/*
Handling bracketed expressions is probably the most complex part.
@@ -10400,7 +8997,7 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA)
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
matchingpath += IMM2_SIZE;
}
-else if (opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Other brackets simply allocate the next entry. */
private_data_ptr = PRIVATE_DATA(ccbegin);
@@ -10439,32 +9036,35 @@ if (bra == OP_BRAMINZERO)
free_stack(common, 1);
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
}
- else if (opcode == OP_ONCE || opcode >= OP_SBRA)
+ else
{
- jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- /* Nothing stored during the first run. */
- skip = JUMP(SLJIT_JUMP);
- JUMPHERE(jump);
- /* Checking zero-length iteration. */
- if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+ if (opcode == OP_ONCE || opcode >= OP_SBRA)
{
- /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ /* Nothing stored during the first run. */
+ skip = JUMP(SLJIT_JUMP);
+ JUMPHERE(jump);
+ /* Checking zero-length iteration. */
+ if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+ {
+ /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
+ braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ }
+ else
+ {
+ /* Except when the whole stack frame must be saved. */
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
+ }
+ JUMPHERE(skip);
}
else
{
- /* Except when the whole stack frame must be saved. */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
+ jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ JUMPHERE(jump);
}
- JUMPHERE(skip);
- }
- else
- {
- jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- JUMPHERE(jump);
}
}
@@ -10481,7 +9081,7 @@ if (ket == OP_KETRMIN)
if (ket == OP_KETRMAX)
{
rmax_label = LABEL();
- if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
+ if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
}
@@ -10585,7 +9185,7 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
}
}
-else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Saving the previous value. */
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
@@ -10714,9 +9314,6 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
if (opcode == OP_ONCE)
match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
-if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, backtrack);
-
stacksize = 0;
if (repeat_type == OP_MINUPTO)
{
@@ -10786,15 +9383,13 @@ if (ket == OP_KETRMAX)
if (opcode != OP_ONCE)
free_stack(common, 1);
}
- else if (opcode < OP_BRA || opcode >= OP_SBRA)
+ else if (opcode == OP_ONCE || opcode >= OP_SBRA)
{
if (has_alternatives)
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
-
/* Checking zero-length iteration. */
if (opcode != OP_ONCE)
{
- /* This case includes opcodes such as OP_SCRIPT_RUN. */
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
/* Drop STR_PTR for greedy plus quantifier. */
if (bra != OP_BRAZERO)
@@ -10861,7 +9456,7 @@ if (opcode == OP_ONCE)
/* We temporarily encode the needs_control_head in the lowest bit.
Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
the same value for small signed numbers (including negative numbers). */
- BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+ BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
}
return cc + repeat_length;
}
@@ -11356,7 +9951,7 @@ if (exact > 1)
#ifdef SUPPORT_UNICODE
&& !common->utf
#endif
- && type != OP_ANYNL && type != OP_EXTUNI)
+ )
{
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
@@ -12039,7 +10634,6 @@ while (cc < ccend)
break;
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -12193,14 +10787,14 @@ switch(opcode)
if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- move_back(common, NULL, TRUE);
+ skip_char_back(common);
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
}
else
{
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
- move_back(common, NULL, TRUE);
+ skip_char_back(common);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
}
@@ -12646,9 +11240,6 @@ if (has_alternatives)
compile_matchingpath(common, ccprev, cc, current);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
-
- if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, current);
}
/* Instructions after the current alternative is successfully matched. */
@@ -12777,7 +11368,7 @@ if (offset != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
}
-else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
{
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
@@ -13126,7 +11717,6 @@ while (current)
break;
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -13426,9 +12016,6 @@ sljit_emit_fast_return(compiler, TMP2, 0);
#undef COMPILE_BACKTRACKINGPATH
#undef CURRENT_AS
-#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
- (PCRE2_JIT_INVALID_UTF)
-
static int jit_compile(pcre2_code *code, sljit_u32 mode)
{
pcre2_real_code *re = (pcre2_real_code *)code;
@@ -13465,11 +12052,6 @@ common->re = re;
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
-#ifdef SUPPORT_UNICODE
-common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
-#endif /* SUPPORT_UNICODE */
-mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
-
common->start = rootbacktrack.cc;
common->read_only_data_head = NULL;
common->fcc = tables + fcc_offset;
@@ -13484,7 +12066,6 @@ switch(re->newline_convention)
case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
- case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
default: return PCRE2_ERROR_INTERNAL;
}
common->nlmax = READ_CHAR_MAX;
@@ -13536,8 +12117,6 @@ if (common->utf)
common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
}
-else
- common->invalid_utf = FALSE;
#endif /* SUPPORT_UNICODE */
ccend = bracketend(common->start);
@@ -13978,49 +12557,22 @@ if (common->utfreadchar != NULL)
set_jumps(common->utfreadchar, LABEL());
do_utfreadchar(common);
}
+if (common->utfreadchar16 != NULL)
+ {
+ set_jumps(common->utfreadchar16, LABEL());
+ do_utfreadchar16(common);
+ }
if (common->utfreadtype8 != NULL)
{
set_jumps(common->utfreadtype8, LABEL());
do_utfreadtype8(common);
}
-if (common->utfpeakcharback != NULL)
- {
- set_jumps(common->utfpeakcharback, LABEL());
- do_utfpeakcharback(common);
- }
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
-#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
-if (common->utfreadchar_invalid != NULL)
- {
- set_jumps(common->utfreadchar_invalid, LABEL());
- do_utfreadchar_invalid(common);
- }
-if (common->utfreadnewline_invalid != NULL)
- {
- set_jumps(common->utfreadnewline_invalid, LABEL());
- do_utfreadnewline_invalid(common);
- }
-if (common->utfmoveback_invalid)
- {
- set_jumps(common->utfmoveback_invalid, LABEL());
- do_utfmoveback_invalid(common);
- }
-if (common->utfpeakcharback_invalid)
- {
- set_jumps(common->utfpeakcharback_invalid, LABEL());
- do_utfpeakcharback_invalid(common);
- }
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
if (common->getucd != NULL)
{
set_jumps(common->getucd, LABEL());
do_getucd(common);
}
-if (common->getucdtype != NULL)
- {
- set_jumps(common->getucdtype, LABEL());
- do_getucdtype(common);
- }
#endif /* SUPPORT_UNICODE */
SLJIT_FREE(common->optimized_cbracket, allocator_data);
@@ -14092,7 +12644,7 @@ Returns: 0: success or (*NOJIT) was used
*/
#define PUBLIC_JIT_COMPILE_OPTIONS \
- (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
+ (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code *code, uint32_t options)
@@ -14107,7 +12659,6 @@ return PCRE2_ERROR_JIT_BADOPTION;
pcre2_real_code *re = (pcre2_real_code *)code;
executable_functions *functions;
-uint32_t excluded_options;
int result;
if (code == NULL)
@@ -14122,24 +12673,21 @@ functions = (executable_functions *)re->executable_jit;
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
|| functions->executable_funcs[0] == NULL)) {
- excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
- result = jit_compile(code, options & ~excluded_options);
+ result = jit_compile(code, PCRE2_JIT_COMPLETE);
if (result != 0)
return result;
}
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
|| functions->executable_funcs[1] == NULL)) {
- excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
- result = jit_compile(code, options & ~excluded_options);
+ result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
if (result != 0)
return result;
}
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
|| functions->executable_funcs[2] == NULL)) {
- excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
- result = jit_compile(code, options & ~excluded_options);
+ result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
if (result != 0)
return result;
}
diff --git a/dist2/src/pcre2_jit_match.c b/dist2/src/pcre2_jit_match.c
index eee03864..5a66545b 100644
--- a/dist2/src/pcre2_jit_match.c
+++ b/dist2/src/pcre2_jit_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -152,6 +152,8 @@ else
jit_stack = NULL;
}
+/* JIT only need two offsets for each ovector entry. Hence
+ the last 1/3 of the ovector will never be touched. */
max_oveccount = functions->top_bracket;
if (oveccount > max_oveccount)
@@ -171,7 +173,7 @@ else
if (rc > (int)oveccount)
rc = 0;
match_data->code = re;
-match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
+match_data->subject = subject;
match_data->rc = rc;
match_data->startchar = arguments.startchar_ptr - subject;
match_data->leftchar = 0;
diff --git a/dist2/src/pcre2_jit_test.c b/dist2/src/pcre2_jit_test.c
index fa14329a..a28e9a0b 100644
--- a/dist2/src/pcre2_jit_test.c
+++ b/dist2/src/pcre2_jit_test.c
@@ -93,9 +93,6 @@ POSSIBILITY OF SUCH DAMAGE.
*/
static int regression_tests(void);
-static int invalid_utf8_regression_tests(void);
-static int invalid_utf16_regression_tests(void);
-static int invalid_utf32_regression_tests(void);
int main(void)
{
@@ -111,10 +108,7 @@ int main(void)
printf("JIT must be enabled to run pcre_jit_test\n");
return 1;
}
- return regression_tests()
- | invalid_utf8_regression_tests()
- | invalid_utf16_regression_tests()
- | invalid_utf32_regression_tests();
+ return regression_tests();
}
/* --------------------------------------------------------------------------------------- */
@@ -190,7 +184,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CM, A, 0, 0, "\\Ca", "CDA" },
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
-#endif /* !NEVER_BACKSLASH_C */
+#endif
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
@@ -268,9 +262,6 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
- { CM, A, 0, 0, "ab|cd", "CD" },
- { CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
- { CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
/* Greedy and non-greedy ? operators. */
{ MU, A, 0, 0, "(?:a)?a", "laab" },
@@ -383,7 +374,6 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
- { CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
/* Unicode properties. */
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
@@ -868,18 +858,6 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
-#ifdef SUPPORT_UNICODE
- /* Script runs and iterations. */
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
- { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
-#endif
-
/* Deep recursion. */
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
@@ -1185,7 +1163,7 @@ static int regression_tests(void)
#elif defined SUPPORT_PCRE2_32
PCRE2_UCHAR32 cpu_info[128];
#endif
-#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
+#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
int return_value;
#endif
@@ -1353,8 +1331,9 @@ static int regression_tests(void)
ovector8_2[i] = -2;
}
if (re8) {
+ (void)pcre2_set_match_limit_8(mcontext8, 10000000);
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
- current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1397,8 +1376,9 @@ static int regression_tests(void)
else
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
+ (void)pcre2_set_match_limit_16(mcontext16, 10000000);
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
- current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1441,8 +1421,9 @@ static int regression_tests(void)
else
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
+ (void)pcre2_set_match_limit_32(mcontext32, 10000000);
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
- current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1470,7 +1451,7 @@ static int regression_tests(void)
is_successful = 1;
if (!(current->start_offset & F_DIFF)) {
-#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
+#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
if (!(current->start_offset & F_FORCECONV)) {
/* All results must be the same. */
@@ -1519,8 +1500,8 @@ static int regression_tests(void)
is_successful = 0;
} else
#endif
- if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
- if (return_value == PCRE2_ERROR_PARTIAL) {
+ if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
+ if (return_value == PCRE_ERROR_PARTIAL) {
return_value = 2;
} else {
return_value *= 2;
@@ -1535,20 +1516,20 @@ static int regression_tests(void)
return_value32[0] = return_value;
#endif
/* Transform back the results. */
- if (current->compile_options & PCRE2_UTF) {
+ if (current->flags & PCRE_UTF8) {
#ifdef SUPPORT_PCRE2_16
for (i = 0; i < return_value; ++i) {
- if (ovector16_1[i] != PCRE2_UNSET)
+ if (ovector16_1[i] >= 0)
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
- if (ovector16_2[i] != PCRE2_UNSET)
+ if (ovector16_2[i] >= 0)
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
}
#endif
#ifdef SUPPORT_PCRE2_32
for (i = 0; i < return_value; ++i) {
- if (ovector32_1[i] != PCRE2_UNSET)
+ if (ovector32_1[i] >= 0)
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
- if (ovector32_2[i] != PCRE2_UNSET)
+ if (ovector32_2[i] >= 0)
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
}
#endif
@@ -1558,7 +1539,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
- i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
+ i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@@ -1566,7 +1547,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
- i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
+ i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@@ -1574,7 +1555,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
- i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
+ i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@@ -1770,696 +1751,4 @@ static int regression_tests(void)
}
}
-#if defined SUPPORT_UNICODE && (defined SUPPORT_PCRE2_8 || defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32)
-
-static int check_invalid_utf_result(int pattern_index, const char *type, int result,
- int match_start, int match_end, PCRE2_SIZE *ovector)
-{
- if (match_start < 0) {
- if (result != -1) {
- printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
- return 1;
- }
- return 0;
- }
-
- if (result <= 0) {
- printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
- return 1;
- }
-
- if (ovector[0] != (PCRE2_SIZE)match_start) {
- printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
- pattern_index, type, (int)ovector[0], match_start);
- return 1;
- }
-
- if (ovector[1] != (PCRE2_SIZE)match_end) {
- printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
- pattern_index, type, (int)ovector[1], match_end);
- return 1;
- }
-
- return 0;
-}
-
-#endif /* SUPPORT_UNICODE && (SUPPORT_PCRE2_8 || SUPPORT_PCRE2_16 || SUPPORT_PCRE2_32) */
-
-#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
-
-#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
-#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
-#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
-
-struct invalid_utf8_regression_test_case {
- int compile_options;
- int jit_compile_options;
- int start_offset;
- int skip_left;
- int skip_right;
- int match_start;
- int match_end;
- const char *pattern[2];
- const char *input;
-};
-
-static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
- { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
- { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
- { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
- { UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
- { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
- { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
- { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
- { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
- { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
-
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
- { UDA, CPI, 4, 1, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
- { UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
- { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
- { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
- { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
- { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
- { UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
-
- { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
- { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
- { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
- { UDA, CPI, 3, 1, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
- { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
- { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
- { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
- { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
-
- { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
- { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
- { UDA, CPI, 2, 1, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
- { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
- { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80#" },
- { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff#" },
- { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf#" },
-
- { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
- { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
- { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
- { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
-
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
-
- { UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
- { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
- { UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
- { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
- { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
- { UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
- { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
- { UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
- { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
-
- { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
- { UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
- { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
- { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
-
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
-
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
- { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
-
- { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
- { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
- { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
- { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
-
- /* These two are not invalid UTF tests, but this infrastructure fits better for them. */
- { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
- { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
-
- { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
-};
-
-#undef UDA
-#undef CI
-#undef CPI
-
-static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
- int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
-{
- pcre2_code_8 *code;
- int result, errorcode;
- PCRE2_SIZE length, erroroffset;
- PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
-
- if (current->pattern[i] == NULL)
- return 1;
-
- code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
- current->compile_options, &errorcode, &erroroffset, ccontext);
-
- if (!code) {
- printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
- return 0;
- }
-
- if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
- printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
- pcre2_code_free_8(code);
- return 0;
- }
-
- length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
-
- if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
- result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
- length, current->start_offset - current->skip_left, 0, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_8(code);
- return 0;
- }
- }
-
- if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
- result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
- length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_8(code);
- return 0;
- }
- }
-
- pcre2_code_free_8(code);
- return 1;
-}
-
-static int invalid_utf8_regression_tests(void)
-{
- struct invalid_utf8_regression_test_case *current;
- pcre2_compile_context_8 *ccontext;
- pcre2_match_data_8 *mdata;
- int total = 0, successful = 0;
- int result;
-
- printf("\nRunning invalid-utf8 JIT regression tests\n");
-
- ccontext = pcre2_compile_context_create_8(NULL);
- pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
- mdata = pcre2_match_data_create_8(4, NULL);
-
- for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
- /* printf("\nPattern: %s :\n", current->pattern); */
- total++;
-
- result = 1;
- if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
- result = 0;
- if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
- result = 0;
-
- if (result) {
- successful++;
- }
-
- printf(".");
- if ((total % 60) == 0)
- printf("\n");
- }
-
- if ((total % 60) != 0)
- printf("\n");
-
- pcre2_match_data_free_8(mdata);
- pcre2_compile_context_free_8(ccontext);
-
- if (total == successful) {
- printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
- return 0;
- } else {
- printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
- return 1;
- }
-}
-
-#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
-
-static int invalid_utf8_regression_tests(void)
-{
- return 0;
-}
-
-#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
-
-#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
-
-#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
-#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
-#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
-
-struct invalid_utf16_regression_test_case {
- int compile_options;
- int jit_compile_options;
- int start_offset;
- int skip_left;
- int skip_right;
- int match_start;
- int match_end;
- const PCRE2_UCHAR16 *pattern[2];
- const PCRE2_UCHAR16 *input;
-};
-
-static PCRE2_UCHAR16 allany16[] = { '.', 0 };
-static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
-static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
-static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
-static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
-static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
-static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
-static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
-static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
-static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, '#', 0 };
-static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, '#', 0 };
-static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, '#', 0 };
-static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, '#', 0 };
-static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
-static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
-static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
-static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
-static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
-static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
-static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
-
-static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
- { UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
- { UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
- { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
- { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
- { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
- { UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_2 },
- { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
- { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
- { UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_3 },
- { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
-
- { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
- { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
- { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
- { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
- { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
- { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
- { UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
- { UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
- { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
- { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
-
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
- { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
-
- { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
- { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
- { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
- { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
- { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
- { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
-
- { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
- { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
- { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
-
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
-
- { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
- { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
- { PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
- { PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
-
- { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
-};
-
-#undef UDA
-#undef CI
-#undef CPI
-
-static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
- int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
-{
- pcre2_code_16 *code;
- int result, errorcode;
- PCRE2_SIZE length, erroroffset;
- const PCRE2_UCHAR16 *input;
- PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
-
- if (current->pattern[i] == NULL)
- return 1;
-
- code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
- current->compile_options, &errorcode, &erroroffset, ccontext);
-
- if (!code) {
- printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
- return 0;
- }
-
- if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
- printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
- pcre2_code_free_16(code);
- return 0;
- }
-
- input = current->input;
- length = 0;
-
- while (*input++ != 0)
- length++;
-
- length -= current->skip_left + current->skip_right;
-
- if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
- result = pcre2_jit_match_16(code, (current->input + current->skip_left),
- length, current->start_offset - current->skip_left, 0, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_16(code);
- return 0;
- }
- }
-
- if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
- result = pcre2_jit_match_16(code, (current->input + current->skip_left),
- length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_16(code);
- return 0;
- }
- }
-
- pcre2_code_free_16(code);
- return 1;
-}
-
-static int invalid_utf16_regression_tests(void)
-{
- struct invalid_utf16_regression_test_case *current;
- pcre2_compile_context_16 *ccontext;
- pcre2_match_data_16 *mdata;
- int total = 0, successful = 0;
- int result;
-
- printf("\nRunning invalid-utf16 JIT regression tests\n");
-
- ccontext = pcre2_compile_context_create_16(NULL);
- pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
- mdata = pcre2_match_data_create_16(4, NULL);
-
- for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
- /* printf("\nPattern: %s :\n", current->pattern); */
- total++;
-
- result = 1;
- if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
- result = 0;
- if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
- result = 0;
-
- if (result) {
- successful++;
- }
-
- printf(".");
- if ((total % 60) == 0)
- printf("\n");
- }
-
- if ((total % 60) != 0)
- printf("\n");
-
- pcre2_match_data_free_16(mdata);
- pcre2_compile_context_free_16(ccontext);
-
- if (total == successful) {
- printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
- return 0;
- } else {
- printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
- return 1;
- }
-}
-
-#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
-
-static int invalid_utf16_regression_tests(void)
-{
- return 0;
-}
-
-#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
-
-#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
-
-#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
-#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
-#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
-
-struct invalid_utf32_regression_test_case {
- int compile_options;
- int jit_compile_options;
- int start_offset;
- int skip_left;
- int skip_right;
- int match_start;
- int match_end;
- const PCRE2_UCHAR32 *pattern[2];
- const PCRE2_UCHAR32 *input;
-};
-
-static PCRE2_UCHAR32 allany32[] = { '.', 0 };
-static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
-static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
-static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
-static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
-static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
-static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
-static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x10ffff, 0 };
-static PCRE2_UCHAR32 test32_2[] = { 'a', 'A', 0x110000, 0 };
-static PCRE2_UCHAR32 test32_3[] = { '#', 0x10ffff, 0x110000, 0 };
-static PCRE2_UCHAR32 test32_4[] = { ' ', 0x2028, '#', 0 };
-static PCRE2_UCHAR32 test32_5[] = { ' ', 0x110000, 0x2028, '#', 0 };
-
-static struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
- { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
- { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
-
- { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
- { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
- { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
-
- { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_2 },
- { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_2 },
-
- { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
- { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
-
- { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_3 },
- { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_3 },
- { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_3 },
-
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_4 },
- { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_5 },
-
- { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
-};
-
-#undef UDA
-#undef CI
-#undef CPI
-
-static int run_invalid_utf32_test(struct invalid_utf32_regression_test_case *current,
- int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
-{
- pcre2_code_32 *code;
- int result, errorcode;
- PCRE2_SIZE length, erroroffset;
- const PCRE2_UCHAR32 *input;
- PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
-
- if (current->pattern[i] == NULL)
- return 1;
-
- code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
- current->compile_options, &errorcode, &erroroffset, ccontext);
-
- if (!code) {
- printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
- return 0;
- }
-
- if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
- printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
- pcre2_code_free_32(code);
- return 0;
- }
-
- input = current->input;
- length = 0;
-
- while (*input++ != 0)
- length++;
-
- length -= current->skip_left + current->skip_right;
-
- if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
- result = pcre2_jit_match_32(code, (current->input + current->skip_left),
- length, current->start_offset - current->skip_left, 0, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_32(code);
- return 0;
- }
- }
-
- if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
- result = pcre2_jit_match_32(code, (current->input + current->skip_left),
- length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
-
- if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
- pcre2_code_free_32(code);
- return 0;
- }
- }
-
- pcre2_code_free_32(code);
- return 1;
-}
-
-static int invalid_utf32_regression_tests(void)
-{
- struct invalid_utf32_regression_test_case *current;
- pcre2_compile_context_32 *ccontext;
- pcre2_match_data_32 *mdata;
- int total = 0, successful = 0;
- int result;
-
- printf("\nRunning invalid-utf32 JIT regression tests\n");
-
- ccontext = pcre2_compile_context_create_32(NULL);
- pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
- mdata = pcre2_match_data_create_32(4, NULL);
-
- for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
- /* printf("\nPattern: %s :\n", current->pattern); */
- total++;
-
- result = 1;
- if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
- result = 0;
- if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
- result = 0;
-
- if (result) {
- successful++;
- }
-
- printf(".");
- if ((total % 60) == 0)
- printf("\n");
- }
-
- if ((total % 60) != 0)
- printf("\n");
-
- pcre2_match_data_free_32(mdata);
- pcre2_compile_context_free_32(ccontext);
-
- if (total == successful) {
- printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
- return 0;
- } else {
- printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
- return 1;
- }
-}
-
-#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
-
-static int invalid_utf32_regression_tests(void)
-{
- return 0;
-}
-
-#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
-
/* End of pcre2_jit_test.c */
diff --git a/dist2/src/pcre2_maketables.c b/dist2/src/pcre2_maketables.c
index 5921e907..537edba8 100644
--- a/dist2/src/pcre2_maketables.c
+++ b/dist2/src/pcre2_maketables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -114,17 +114,17 @@ test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
- if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
- if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
- if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
- if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
- if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
- if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
- if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
- if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
- if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
- if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
- if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
+ if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
+ if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
+ if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
+ if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
+ if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
+ if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
+ if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
+ if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
+ if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
+ if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
+ if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
@@ -138,8 +138,8 @@ for (i = 0; i < 256; i++)
int x = 0;
if (isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
- if (islower(i)) x += ctype_lcletter;
if (isdigit(i)) x += ctype_digit;
+ if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
*p++ = x;
}
diff --git a/dist2/src/pcre2_match.c b/dist2/src/pcre2_match.c
index 419561fd..8741e143 100644
--- a/dist2/src/pcre2_match.c
+++ b/dist2/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2015-2019 University of Cambridge
+ New API code Copyright (c) 2015-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -69,12 +69,11 @@ information, and fields within it. */
#define PUBLIC_MATCH_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
- PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
+ PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
#define PUBLIC_JIT_MATCH_OPTIONS \
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
- PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
- PCRE2_COPY_MATCHED_SUBJECT)
+ PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
/* Non-error returns from and within the match() function. Error returns are
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
@@ -1849,7 +1848,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
else
@@ -1871,7 +1870,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
#endif
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
@@ -1903,7 +1902,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
else
@@ -1928,7 +1927,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
#endif
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -1957,7 +1956,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (Fop == OP_CLASS) break;
}
else
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
Feptr += len;
}
@@ -1994,7 +1993,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
#endif
- if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
Feptr++;
}
@@ -4085,7 +4084,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHAR(fc, fptr);
}
lgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
Feptr = fptr;
rgb = lgb;
}
@@ -5015,7 +5014,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
must record a backtracking point and also set up a chained frame. */
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_SBRA:
Lframe_type = GF_NOCAPTURE | Fop;
@@ -5528,14 +5526,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_ASSERTBACK_NOT:
RRETURN(MATCH_MATCH);
- /* At the end of a script run, apply the script-checking rules. This code
- will never by exercised if Unicode support it not compiled, because in
- that environment script runs cause an error at compile time. */
-
- case OP_SCRIPT_RUN:
- if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
- break;
-
/* Whole-pattern recursion is coded as a recurse into group 0, so it
won't be picked up here. Instead, we catch it when the OP_END is reached.
Other recursion is handled here. */
@@ -6010,10 +6000,9 @@ pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
pcre2_match_context *mcontext)
{
int rc;
-int was_zero_terminated = 0;
const uint8_t *start_bits = NULL;
-const pcre2_real_code *re = (const pcre2_real_code *)code;
+const pcre2_real_code *re = (const pcre2_real_code *)code;
BOOL anchored;
BOOL firstline;
@@ -6054,11 +6043,7 @@ mb->stack_frames = (heapframe *)stack_frames_vector;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
-if (length == PCRE2_ZERO_TERMINATED)
- {
- length = PRIV(strlen)(subject);
- was_zero_terminated = 1;
- }
+if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
end_subject = subject + length;
/* Plausibility checks */
@@ -6173,17 +6158,6 @@ if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
(re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
-/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
-free the memory that was obtained. Set the field to NULL for no match cases. */
-
-if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
- {
- match_data->memctl.free((void *)match_data->subject,
- match_data->memctl.memory_data);
- match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
- }
-match_data->subject = NULL;
-
/* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if
@@ -6195,19 +6169,7 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
{
rc = pcre2_jit_match(code, subject, length, start_offset, options,
match_data, mcontext);
- if (rc != PCRE2_ERROR_JIT_BADOPTION)
- {
- if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
- {
- length = CU2BYTES(length + was_zero_terminated);
- match_data->subject = match_data->memctl.malloc(length,
- match_data->memctl.memory_data);
- if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
- memcpy((void *)match_data->subject, subject, length);
- match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
- }
- return rc;
- }
+ if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc;
}
#endif
@@ -6459,7 +6421,7 @@ for(;;)
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
- ok = (start_bits[c/8] & (1u << (c&7))) != 0;
+ ok = (start_bits[c/8] & (1 << (c&7))) != 0;
}
}
if (!ok)
@@ -6576,7 +6538,7 @@ for(;;)
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
start_match++;
}
@@ -6847,13 +6809,13 @@ if (mb->match_frames != mb->stack_frames)
/* Fill in fields that are always returned in the match data. */
match_data->code = re;
+match_data->subject = subject;
match_data->mark = mb->mark;
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
/* Handle a fully successful match. Set the return code to the number of
captured strings, or 0 if there were too many to fit into the ovector, and then
-set the remaining returned values before returning. Make a copy of the subject
-string if requested. */
+set the remaining returned values before returning. */
if (rc == MATCH_MATCH)
{
@@ -6863,16 +6825,6 @@ if (rc == MATCH_MATCH)
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
mb->last_used_ptr : mb->end_match_ptr) - subject;
- if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
- {
- length = CU2BYTES(length + was_zero_terminated);
- match_data->subject = match_data->memctl.malloc(length,
- match_data->memctl.memory_data);
- if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
- memcpy((void *)match_data->subject, subject, length);
- match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
- }
- else match_data->subject = subject;
return match_data->rc;
}
@@ -6886,14 +6838,10 @@ match_data->mark = mb->nomatch_mark;
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
-/* Handle a partial match. If a "soft" partial match was requested, searching
-for a complete match will have continued, and the value of rc at this point
-will be MATCH_NOMATCH. For a "hard" partial match, it will already be
-PCRE2_ERROR_PARTIAL. */
+/* Handle a partial match. */
else if (match_partial != NULL)
{
- match_data->subject = subject;
match_data->ovector[0] = match_partial - subject;
match_data->ovector[1] = end_subject - subject;
match_data->startchar = match_partial - subject;
diff --git a/dist2/src/pcre2_match_data.c b/dist2/src/pcre2_match_data.c
index ccc5f674..b297f326 100644
--- a/dist2/src/pcre2_match_data.c
+++ b/dist2/src/pcre2_match_data.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,6 @@ yield = PRIV(memctl_malloc)(
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
-yield->flags = 0;
return yield;
}
@@ -94,12 +93,7 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
- {
- if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
- match_data->memctl.free((void *)match_data->subject,
- match_data->memctl.memory_data);
match_data->memctl.free(match_data, match_data->memctl.memory_data);
- }
}
diff --git a/dist2/src/pcre2_printint.c b/dist2/src/pcre2_printint.c
index b132d44f..bd10c6b1 100644
--- a/dist2/src/pcre2_printint.c
+++ b/dist2/src/pcre2_printint.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -393,7 +393,6 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
@@ -673,18 +672,17 @@ for(;;)
map = (uint8_t *)ccode;
if (invertmap)
{
- /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
- for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
+ for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
map = inverted_map;
}
for (i = 0; i < 256; i++)
{
- if ((map[i/8] & (1u << (i&7))) != 0)
+ if ((map[i/8] & (1 << (i&7))) != 0)
{
int j;
for (j = i+1; j < 256; j++)
- if ((map[j/8] & (1u << (j&7))) == 0) break;
+ if ((map[j/8] & (1 << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (PRINTABLE(i)) fprintf(f, "%c", i);
else fprintf(f, "\\x%02x", i);
diff --git a/dist2/src/pcre2_script_run.c b/dist2/src/pcre2_script_run.c
deleted file mode 100644
index 91a48330..00000000
--- a/dist2/src/pcre2_script_run.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*************************************************
-* Perl-Compatible Regular Expressions *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-/* This module contains the function for checking a script run. */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "pcre2_internal.h"
-
-
-/*************************************************
-* Check script run *
-*************************************************/
-
-/* A script run is conceptually a sequence of characters all in the same
-Unicode script. However, it isn't quite that simple. There are special rules
-for scripts that are commonly used together, and also special rules for digits.
-This function implements the appropriate checks, which is possible only when
-PCRE2 is compiled with Unicode support. The function returns TRUE if there is
-no Unicode support; however, it should never be called in that circumstance
-because an error is given by pcre2_compile() if a script run is called for in a
-version of PCRE2 compiled without Unicode support.
-
-Arguments:
- pgr point to the first character
- endptr point after the last character
- utf TRUE if in UTF mode
-
-Returns: TRUE if this is a valid script run
-*/
-
-/* These dummy values must be less than the negation of the largest offset in
-the PRIV(ucd_script_sets) vector, which is held in a 16-bit field in UCD
-records (and is only likely to be a few hundred). */
-
-#define SCRIPT_UNSET (-99999)
-#define SCRIPT_HANPENDING (-99998)
-#define SCRIPT_HANHIRAKATA (-99997)
-#define SCRIPT_HANBOPOMOFO (-99996)
-#define SCRIPT_HANHANGUL (-99995)
-#define SCRIPT_LIST (-99994)
-
-#define INTERSECTION_LIST_SIZE 50
-
-BOOL
-PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
-{
-#ifdef SUPPORT_UNICODE
-int require_script = SCRIPT_UNSET;
-uint8_t intersection_list[INTERSECTION_LIST_SIZE];
-const uint8_t *require_list = NULL;
-uint32_t require_digitset = 0;
-uint32_t c;
-
-#if PCRE2_CODE_UNIT_WIDTH == 32
-(void)utf; /* Avoid compiler warning */
-#endif
-
-/* Any string containing fewer than 2 characters is a valid script run. */
-
-if (ptr >= endptr) return TRUE;
-GETCHARINCTEST(c, ptr);
-if (ptr >= endptr) return TRUE;
-
-/* Scan strings of two or more characters, checking the Unicode characteristics
-of each code point. We make use of the Script Extensions property. There is
-special code for scripts that can be combined with characters from the Han
-Chinese script. This may be used in conjunction with four other scripts in
-these combinations:
-
-. Han with Hiragana and Katakana is allowed (for Japanese).
-. Han with Bopomofo is allowed (for Taiwanese Mandarin).
-. Han with Hangul is allowed (for Korean).
-
-If the first significant character's script is one of the four, the required
-script type is immediately known. However, if the first significant
-character's script is Han, we have to keep checking for a non-Han character.
-Hence the SCRIPT_HANPENDING state. */
-
-for (;;)
- {
- const ucd_record *ucd = GET_UCD(c);
- int32_t scriptx = ucd->scriptx;
-
- /* If the script extension is Unknown, the string is not a valid script run.
- Such characters can only form script runs of length one. */
-
- if (scriptx == ucp_Unknown) return FALSE;
-
- /* A character whose script extension is Inherited is always accepted with
- any script, and plays no further part in this testing. A character whose
- script is Common is always accepted, but must still be tested for a digit
- below. The scriptx value at this point is non-zero, because zero is
- ucp_Unknown, tested for above. */
-
- if (scriptx != ucp_Inherited)
- {
- if (scriptx != ucp_Common)
- {
- /* If the script extension value is positive, the character is not a mark
- that can be used with many scripts. In the simple case we either set or
- compare with the required script. However, handling the scripts that can
- combine with Han are more complicated, as is the case when the previous
- characters have been man-script marks. */
-
- if (scriptx > 0)
- {
- switch(require_script)
- {
- /* Either the first significant character (require_script unset) or
- after only Han characters. */
-
- case SCRIPT_UNSET:
- case SCRIPT_HANPENDING:
- switch(scriptx)
- {
- case ucp_Han:
- require_script = SCRIPT_HANPENDING;
- break;
-
- case ucp_Hiragana:
- case ucp_Katakana:
- require_script = SCRIPT_HANHIRAKATA;
- break;
-
- case ucp_Bopomofo:
- require_script = SCRIPT_HANBOPOMOFO;
- break;
-
- case ucp_Hangul:
- require_script = SCRIPT_HANHANGUL;
- break;
-
- /* Not a Han-related script. If expecting one, fail. Otherise set
- the requirement to this script. */
-
- default:
- if (require_script == SCRIPT_HANPENDING) return FALSE;
- require_script = scriptx;
- break;
- }
- break;
-
- /* Previously encountered one of the "with Han" scripts. Check that
- this character is appropriate. */
-
- case SCRIPT_HANHIRAKATA:
- if (scriptx != ucp_Han && scriptx != ucp_Hiragana &&
- scriptx != ucp_Katakana)
- return FALSE;
- break;
-
- case SCRIPT_HANBOPOMOFO:
- if (scriptx != ucp_Han && scriptx != ucp_Bopomofo) return FALSE;
- break;
-
- case SCRIPT_HANHANGUL:
- if (scriptx != ucp_Han && scriptx != ucp_Hangul) return FALSE;
- break;
-
- /* We have a list of scripts to check that is derived from one or
- more previous characters. This is either one of the lists in
- ucd_script_sets[] (for one previous character) or the intersection of
- several lists for multiple characters. */
-
- case SCRIPT_LIST:
- {
- const uint8_t *list;
- for (list = require_list; *list != 0; list++)
- {
- if (*list == scriptx) break;
- }
- if (*list == 0) return FALSE;
- }
-
- /* The rest of the string must be in this script, but we have to
- allow for the Han complications. */
-
- switch(scriptx)
- {
- case ucp_Han:
- require_script = SCRIPT_HANPENDING;
- break;
-
- case ucp_Hiragana:
- case ucp_Katakana:
- require_script = SCRIPT_HANHIRAKATA;
- break;
-
- case ucp_Bopomofo:
- require_script = SCRIPT_HANBOPOMOFO;
- break;
-
- case ucp_Hangul:
- require_script = SCRIPT_HANHANGUL;
- break;
-
- default:
- require_script = scriptx;
- break;
- }
- break;
-
- /* This is the easy case when a single script is required. */
-
- default:
- if (scriptx != require_script) return FALSE;
- break;
- }
- } /* End of handing positive scriptx */
-
- /* If scriptx is negative, this character is a mark-type character that
- has a list of permitted scripts. */
-
- else
- {
- uint32_t chspecial;
- const uint8_t *clist, *rlist;
- const uint8_t *list = PRIV(ucd_script_sets) - scriptx;
-
- switch(require_script)
- {
- case SCRIPT_UNSET:
- require_list = PRIV(ucd_script_sets) - scriptx;
- require_script = SCRIPT_LIST;
- break;
-
- /* An inspection of the Unicode 11.0.0 files shows that there are the
- following types of Script Extension list that involve the Han,
- Bopomofo, Hiragana, Katakana, and Hangul scripts:
-
- . Bopomofo + Han
- . Han + Hiragana + Katakana
- . Hiragana + Katakana
- . Bopopmofo + Hangul + Han + Hiragana + Katakana
-
- The following code tries to make sense of this. */
-
-#define FOUND_BOPOMOFO 1
-#define FOUND_HIRAGANA 2
-#define FOUND_KATAKANA 4
-#define FOUND_HANGUL 8
-
- case SCRIPT_HANPENDING:
- chspecial = 0;
- for (; *list != 0; list++)
- {
- switch (*list)
- {
- case ucp_Bopomofo: chspecial |= FOUND_BOPOMOFO; break;
- case ucp_Hiragana: chspecial |= FOUND_HIRAGANA; break;
- case ucp_Katakana: chspecial |= FOUND_KATAKANA; break;
- case ucp_Hangul: chspecial |= FOUND_HANGUL; break;
- default: break;
- }
- }
-
- if (chspecial == 0) return FALSE;
-
- if (chspecial == FOUND_BOPOMOFO)
- {
- require_script = SCRIPT_HANBOPOMOFO;
- }
- else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
- {
- require_script = SCRIPT_HANHIRAKATA;
- }
-
- /* Otherwise it must be allowed with all of them, so remain in
- the pending state. */
-
- break;
-
- case SCRIPT_HANHIRAKATA:
- for (; *list != 0; list++)
- {
- if (*list == ucp_Hiragana || *list == ucp_Katakana) break;
- }
- if (*list == 0) return FALSE;
- break;
-
- case SCRIPT_HANBOPOMOFO:
- for (; *list != 0; list++)
- {
- if (*list == ucp_Bopomofo) break;
- }
- if (*list == 0) return FALSE;
- break;
-
- case SCRIPT_HANHANGUL:
- for (; *list != 0; list++)
- {
- if (*list == ucp_Hangul) break;
- }
- if (*list == 0) return FALSE;
- break;
-
- /* Previously encountered one or more characters that are allowed
- with a list of scripts. Build the intersection of the required list
- with this character's list in intersection_list[]. This code is
- written so that it still works OK if the required list is already in
- that vector. */
-
- case SCRIPT_LIST:
- {
- int i = 0;
- for (rlist = require_list; *rlist != 0; rlist++)
- {
- for (clist = list; *clist != 0; clist++)
- {
- if (*rlist == *clist)
- {
- intersection_list[i++] = *rlist;
- break;
- }
- }
- }
- if (i == 0) return FALSE; /* No scripts in common */
-
- /* If there's just one script in common, we can set it as the
- unique required script. Otherwise, terminate the intersection list
- and make it the required list. */
-
- if (i == 1)
- {
- require_script = intersection_list[0];
- }
- else
- {
- intersection_list[i] = 0;
- require_list = intersection_list;
- }
- }
- break;
-
- /* The previously set required script is a single script, not
- Han-related. Check that it is in this character's list. */
-
- default:
- for (; *list != 0; list++)
- {
- if (*list == require_script) break;
- }
- if (*list == 0) return FALSE;
- break;
- }
- } /* End of handling negative scriptx */
- } /* End of checking non-Common character */
-
- /* The character is in an acceptable script. We must now ensure that all
- decimal digits in the string come from the same set. Some scripts (e.g.
- Common, Arabic) have more than one set of decimal digits. This code does
- not allow mixing sets, even within the same script. The vector called
- PRIV(ucd_digit_sets)[] contains, in its first element, the number of
- following elements, and then, in ascending order, the code points of the
- '9' characters in every set of 10 digits. Each set is identified by the
- offset in the vector of its '9' character. An initial check of the first
- value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
-
- if (ucd->chartype == ucp_Nd)
- {
- uint32_t digitset;
-
- if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
- {
- int mid;
- int bot = 1;
- int top = PRIV(ucd_digit_sets)[0];
- for (;;)
- {
- if (top <= bot + 1) /* <= rather than == is paranoia */
- {
- digitset = top;
- break;
- }
- mid = (top + bot) / 2;
- if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
- }
- }
-
- /* A required value of 0 means "unset". */
-
- if (require_digitset == 0) require_digitset = digitset;
- else if (digitset != require_digitset) return FALSE;
- } /* End digit handling */
- } /* End checking non-Inherited character */
-
- /* If we haven't yet got to the end, pick up the next character. */
-
- if (ptr >= endptr) return TRUE;
- GETCHARINCTEST(c, ptr);
- } /* End checking loop */
-
-#else /* NOT SUPPORT_UNICODE */
-(void)ptr;
-(void)endptr;
-(void)utf;
-return TRUE;
-#endif /* SUPPORT_UNICODE */
-}
-
-/* End of pcre2_script_run.c */
diff --git a/dist2/src/pcre2_study.c b/dist2/src/pcre2_study.c
index e883c2eb..acbf98b4 100644
--- a/dist2/src/pcre2_study.c
+++ b/dist2/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -54,7 +54,7 @@ collecting data (e.g. minimum matching length). */
/* Set a bit in the starting code unit bit map. */
-#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1u << ((c)&7))
+#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1 << ((c)&7))
/* Returns from set_start_bits() */
@@ -171,7 +171,6 @@ for (;;)
/* Fall through */
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_SBRA:
case OP_BRAPOS:
case OP_SBRAPOS:
@@ -843,7 +842,7 @@ for (c = 0; c < table_limit; c++)
if (table_limit == 32) return;
for (c = 128; c < 256; c++)
{
- if ((re->tables[cbits_offset + c/8] & (1u << (c&7))) != 0)
+ if ((re->tables[cbits_offset + c/8] & (1 << (c&7))) != 0)
{
PCRE2_UCHAR buff[6];
(void)PRIV(ord2utf)(c, buff);
@@ -1076,7 +1075,6 @@ do
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_ONCE:
- case OP_SCRIPT_RUN:
case OP_ASSERT:
rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
@@ -1507,11 +1505,11 @@ do
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
for (c = 128; c < 256; c++)
{
- if ((classmap[c/8] & (1u << (c&7))) != 0)
+ if ((classmap[c/8] & (1 << (c&7))) != 0)
{
- int d = (c >> 6) | 0xc0; /* Set bit for this starter */
- re->start_bitmap[d/8] |= (1u << (d&7)); /* and then skip on to the */
- c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
+ int d = (c >> 6) | 0xc0; /* Set bit for this starter */
+ re->start_bitmap[d/8] |= (1 << (d&7)); /* and then skip on to the */
+ c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
}
}
}
diff --git a/dist2/src/pcre2_substitute.c b/dist2/src/pcre2_substitute.c
index ec3dd66d..ab8d1090 100644
--- a/dist2/src/pcre2_substitute.c
+++ b/dist2/src/pcre2_substitute.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -129,7 +129,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, FALSE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -239,17 +239,13 @@ PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[3];
-pcre2_substitute_callout_block scb;
-
-/* General initialization */
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
-/* Partial matching is not valid. This must come after setting *blength to
-PCRE2_UNSET, so as not to imply an offset in the replacement. */
+/* Partial matching is not valid. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
@@ -268,13 +264,6 @@ if (match_data == NULL)
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
-/* Fixed things in the callout block */
-
-scb.version = 0;
-scb.input = subject;
-scb.output = (PCRE2_SPTR)buffer;
-scb.ovector = ovector;
-
/* Find lengths of zero-terminated strings and the end of the replacement. */
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
@@ -401,7 +390,7 @@ do
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
goto EXIT;
}
-
+
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -412,14 +401,11 @@ do
}
subs++;
- /* Copy the text leading up to the match, and remember where the insert
- begins and how many ovector pairs are set. */
+ /* Copy the text leading up to the match. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
- scb.output_offsets[0] = buff_offset;
- scb.oveccount = rc;
/* Process the replacement string. Literal mode is set by \Q, but only in
extended mode when backslashes are being interpreted. In extended mode we
@@ -435,7 +421,7 @@ do
if (ptr >= repend)
{
- if (ptrstackptr == 0) break; /* End of replacement string */
+ if (ptrstackptr <= 0) break; /* End of replacement string */
repend = ptrstack[--ptrstackptr];
ptr = ptrstack[--ptrstackptr];
continue;
@@ -716,7 +702,7 @@ do
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
- )[ch/8] & (1u << (ch%8))) == 0)
+ )[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
@@ -774,7 +760,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, FALSE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
@@ -818,7 +804,7 @@ do
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
- )[ch/8] & (1u << (ch%8))) == 0)
+ )[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
@@ -835,37 +821,10 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
- /* The replacement has been copied to the output, or its size has been
- remembered. Do the callout if there is one and we have done an actual
- replacement. */
-
- if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
- {
- scb.subscount = subs;
- scb.output_offsets[1] = buff_offset;
- rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
-
- /* A non-zero return means cancel this substitution. Instead, copy the
- matched string fragment. */
-
- if (rc != 0)
- {
- PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
- PCRE2_SIZE oldlength = ovector[1] - ovector[0];
-
- buff_offset -= newlength;
- lengthleft += newlength;
- CHECKMEMCPY(subject + ovector[0], oldlength);
-
- /* A negative return means do not do any more. */
-
- if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
- }
- }
-
- /* Save the details of this match. See above for how this data is used. If we
- matched an empty string, do the magic for global matches. Finally, update the
- start offset to point to the rest of the subject string. */
+ /* The replacement has been copied to the output. Save the details of this
+ match. See above for how this data is used. If we matched an empty string, do
+ the magic for global matches. Finally, update the start offset to point to
+ the rest of the subject string. */
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
diff --git a/dist2/src/pcre2_tables.c b/dist2/src/pcre2_tables.c
index 84019361..83d6f9de 100644
--- a/dist2/src/pcre2_tables.c
+++ b/dist2/src/pcre2_tables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -142,7 +142,7 @@ ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
code points. The left property selects a word from the table, and the right
property selects a bit from that word like this:
- PRIV(ucp_gbtable)[left-property] & (1u << right-property)
+ PRIV(ucp_gbtable)[left-property] & (1 << right-property)
The value is non-zero if a grapheme break is NOT permitted between the relevant
two code points. The breaking rules are as follows:
@@ -183,25 +183,25 @@ are implementing).
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
- (1u<<ucp_gbLF), /* 0 CR */
- 0, /* 1 LF */
- 0, /* 2 Control */
- ESZ, /* 3 Extend */
- ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */
- (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
- (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
- (1u<<ucp_gbRegionalIndicator),
- ESZ, /* 5 SpacingMark */
- ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */
- (1u<<ucp_gbLVT),
- ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 7 V */
- ESZ|(1u<<ucp_gbT), /* 8 T */
- ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */
- ESZ|(1u<<ucp_gbT), /* 10 LVT */
- (1u<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
- ESZ, /* 12 Other */
- ESZ, /* 13 ZWJ */
- ESZ|(1u<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
+ (1<<ucp_gbLF), /* 0 CR */
+ 0, /* 1 LF */
+ 0, /* 2 Control */
+ ESZ, /* 3 Extend */
+ ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
+ (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
+ (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
+ (1<<ucp_gbRegionalIndicator),
+ ESZ, /* 5 SpacingMark */
+ ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
+ (1<<ucp_gbLVT),
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
+ ESZ|(1<<ucp_gbT), /* 8 T */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
+ ESZ|(1<<ucp_gbT), /* 10 LVT */
+ (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
+ ESZ, /* 12 Other */
+ ESZ, /* 13 ZWJ */
+ ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
};
#undef ESZ
@@ -417,7 +417,6 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
-#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
#define STRING_Vai0 STR_V STR_a STR_i "\0"
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
#define STRING_Xan0 STR_X STR_a STR_n "\0"
@@ -612,7 +611,6 @@ const char PRIV(utt_names)[] =
STRING_Tifinagh0
STRING_Tirhuta0
STRING_Ugaritic0
- STRING_Unknown0
STRING_Vai0
STRING_Warang_Citi0
STRING_Xan0
@@ -807,20 +805,19 @@ const ucp_type_table PRIV(utt)[] = {
{ 1424, PT_SC, ucp_Tifinagh },
{ 1433, PT_SC, ucp_Tirhuta },
{ 1441, PT_SC, ucp_Ugaritic },
- { 1450, PT_SC, ucp_Unknown },
- { 1458, PT_SC, ucp_Vai },
- { 1462, PT_SC, ucp_Warang_Citi },
- { 1474, PT_ALNUM, 0 },
- { 1478, PT_PXSPACE, 0 },
- { 1482, PT_SPACE, 0 },
- { 1486, PT_UCNC, 0 },
- { 1490, PT_WORD, 0 },
- { 1494, PT_SC, ucp_Yi },
- { 1497, PT_GC, ucp_Z },
- { 1499, PT_SC, ucp_Zanabazar_Square },
- { 1516, PT_PC, ucp_Zl },
- { 1519, PT_PC, ucp_Zp },
- { 1522, PT_PC, ucp_Zs }
+ { 1450, PT_SC, ucp_Vai },
+ { 1454, PT_SC, ucp_Warang_Citi },
+ { 1466, PT_ALNUM, 0 },
+ { 1470, PT_PXSPACE, 0 },
+ { 1474, PT_SPACE, 0 },
+ { 1478, PT_UCNC, 0 },
+ { 1482, PT_WORD, 0 },
+ { 1486, PT_SC, ucp_Yi },
+ { 1489, PT_GC, ucp_Z },
+ { 1491, PT_SC, ucp_Zanabazar_Square },
+ { 1508, PT_PC, ucp_Zl },
+ { 1511, PT_PC, ucp_Zp },
+ { 1514, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/dist2/src/pcre2_ucd.c b/dist2/src/pcre2_ucd.c
index cc53c240..275a4be2 100644
--- a/dist2/src/pcre2_ucd.c
+++ b/dist2/src/pcre2_ucd.c
@@ -20,7 +20,7 @@ needed. */
/* Unicode character database. */
/* This file was autogenerated by the MultiStage2.py script. */
-/* Total size: 97152 bytes, block size: 128. */
+/* Total size: 92592 bytes, block size: 128. */
/* The tables herein are needed only when UCP support is built,
and in PCRE2 that happens automatically with UTF support.
@@ -30,10 +30,10 @@ a comment was received about space saving - maybe the guy linked
all the modules rather than using a library - so we include a
condition to cut out the tables when not needed. But don't leave
a totally empty module because some compilers barf at that.
-Instead, just supply some small dummy tables. */
+Instead, just supply small dummy tables. */
#ifndef SUPPORT_UNICODE
-const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }};
+const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
const uint16_t PRIV(ucd_stage1)[] = {0};
const uint16_t PRIV(ucd_stage2)[] = {0};
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
@@ -47,13 +47,11 @@ special record. */
#if PCRE2_CODE_UNIT_WIDTH == 32
const ucd_record PRIV(dummy_ucd_record)[] = {{
- ucp_Unknown, /* script */
- ucp_Cn, /* type unassigned */
- ucp_gbOther, /* grapheme break property */
- 0, /* case set */
- 0, /* other case */
- ucp_Unknown, /* script extension */
- 0, /* dummy filler */
+ ucp_Common, /* script */
+ ucp_Cn, /* type unassigned */
+ ucp_gbOther, /* grapheme break property */
+ 0, /* case set */
+ 0, /* other case */
}};
#endif
@@ -67,13 +65,9 @@ uint8_t property_1;
uint8_t property_2;
uint8_t property_3;
pcre_int32 property_4;
-pcre_int16 property_5;
-uint16_t property_6;
} ucd_record;
*/
-/* This table contains lists of characters that are caseless sets of
-more than one character. Each list is terminated by NOTACHAR. */
const uint32_t PRIV(ucd_caseless_sets)[] = {
NOTACHAR,
@@ -106,1014 +100,865 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {
0x1c88, 0xa64a, 0xa64b, NOTACHAR,
};
-/* When #included in pcre2test, we don't need the table of digit
-sets, nor the the large main UCD tables. */
+/* When #included in pcre2test, we don't need this large table. */
#ifndef PCRE2_PCRE2TEST
-/* This table lists the code points for the '9' characters in each
-set of decimal digits. It is used to ensure that all the digits in
-a script run come from the same set. */
-
-const uint32_t PRIV(ucd_digit_sets)[] = {
- 61, /* Number of subsequent values */
- 0x00039, 0x00669, 0x006f9, 0x007c9, 0x0096f, 0x009ef, 0x00a6f, 0x00aef,
- 0x00b6f, 0x00bef, 0x00c6f, 0x00cef, 0x00d6f, 0x00def, 0x00e59, 0x00ed9,
- 0x00f29, 0x01049, 0x01099, 0x017e9, 0x01819, 0x0194f, 0x019d9, 0x01a89,
- 0x01a99, 0x01b59, 0x01bb9, 0x01c49, 0x01c59, 0x0a629, 0x0a8d9, 0x0a909,
- 0x0a9d9, 0x0a9f9, 0x0aa59, 0x0abf9, 0x0ff19, 0x104a9, 0x10d39, 0x1106f,
- 0x110f9, 0x1113f, 0x111d9, 0x112f9, 0x11459, 0x114d9, 0x11659, 0x116c9,
- 0x11739, 0x118e9, 0x11c59, 0x11d59, 0x11da9, 0x16a69, 0x16b59, 0x1d7d7,
- 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e959,
-};
-
-/* This vector is a list of lists of scripts for the Script Extension
-property. Each sublist is zero-terminated. */
-
-const uint8_t PRIV(ucd_script_sets)[] = {
- /* 0 */ 0,
- /* 1 */ 1, 11, 0,
- /* 4 */ 1, 144, 0,
- /* 7 */ 1, 50, 0,
- /* 10 */ 1, 56, 0,
- /* 13 */ 2, 17, 0,
- /* 16 */ 3, 15, 0,
- /* 19 */ 4, 23, 0,
- /* 22 */ 6, 84, 0,
- /* 25 */ 12, 36, 0,
- /* 28 */ 13, 18, 0,
- /* 31 */ 13, 34, 0,
- /* 34 */ 13, 118, 0,
- /* 37 */ 15, 107, 0,
- /* 40 */ 15, 100, 0,
- /* 43 */ 15, 54, 0,
- /* 46 */ 17, 34, 0,
- /* 49 */ 107, 54, 0,
- /* 52 */ 21, 108, 0,
- /* 55 */ 22, 129, 0,
- /* 58 */ 27, 30, 0,
- /* 61 */ 38, 65, 0,
- /* 64 */ 1, 50, 56, 0,
- /* 68 */ 3, 96, 49, 0,
- /* 72 */ 96, 39, 53, 0,
- /* 76 */ 12, 110, 36, 0,
- /* 80 */ 15, 107, 29, 0,
- /* 84 */ 15, 107, 34, 0,
- /* 88 */ 23, 27, 30, 0,
- /* 92 */ 69, 34, 39, 0,
- /* 96 */ 1, 144, 50, 56, 0,
- /* 101 */ 3, 15, 107, 29, 0,
- /* 106 */ 7, 25, 52, 51, 0,
- /* 111 */ 15, 142, 85, 111, 0,
- /* 116 */ 4, 24, 23, 27, 30, 0,
- /* 122 */ 4, 24, 23, 27, 30, 61, 0,
- /* 129 */ 15, 29, 37, 44, 54, 55, 0,
- /* 136 */ 132, 1, 95, 112, 121, 144, 148, 50, 0,
- /* 145 */ 15, 142, 21, 22, 108, 85, 111, 114, 109, 102, 124, 0,
- /* 157 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 54, 55, 124, 0,
- /* 170 */ 15, 142, 21, 22, 108, 29, 85, 111, 114, 109, 102, 124, 0,
- /* 183 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 100, 54, 55, 124, 0,
- /* 197 */ 15, 142, 21, 22, 108, 29, 85, 111, 37, 114, 109, 102, 124, 0,
- /* 211 */ 3, 15, 142, 143, 107, 21, 22, 29, 111, 37, 44, 109, 48, 49, 102, 54, 55, 124, 0,
- /* 230 */ 3, 15, 142, 143, 107, 21, 22, 29, 35, 111, 37, 44, 109, 48, 49, 102, 54, 55, 124, 0,
- /* 250 */
-};
-
-/* These are the main two-stage UCD tables. The fields in each record are:
-script (8 bits), character type (8 bits), grapheme break property (8 bits),
-offset to multichar other cases or zero (8 bits), offset to other case
-or zero (32 bits, signed), script extension (16 bits, signed), and a dummy
-16-bit field to make the whole thing a multiple of 4 bytes. */
-
-const ucd_record PRIV(ucd_records)[] = { /* 11136 bytes, record size 12 */
- { 10, 0, 2, 0, 0, 10, 256, }, /* 0 */
- { 10, 0, 2, 0, 0, 10, 0, }, /* 1 */
- { 10, 0, 1, 0, 0, 10, 0, }, /* 2 */
- { 10, 0, 0, 0, 0, 10, 0, }, /* 3 */
- { 10, 29, 12, 0, 0, 10, 0, }, /* 4 */
- { 10, 21, 12, 0, 0, 10, 0, }, /* 5 */
- { 10, 23, 12, 0, 0, 10, 0, }, /* 6 */
- { 10, 22, 12, 0, 0, 10, 0, }, /* 7 */
- { 10, 18, 12, 0, 0, 10, 0, }, /* 8 */
- { 10, 25, 12, 0, 0, 10, 0, }, /* 9 */
- { 10, 17, 12, 0, 0, 10, 0, }, /* 10 */
- { 10, 13, 12, 0, 0, 10, 0, }, /* 11 */
- { 34, 9, 12, 0, 32, 34, 0, }, /* 12 */
- { 34, 9, 12, 100, 32, 34, 0, }, /* 13 */
- { 34, 9, 12, 1, 32, 34, 0, }, /* 14 */
- { 10, 24, 12, 0, 0, 10, 0, }, /* 15 */
- { 10, 16, 12, 0, 0, 10, 0, }, /* 16 */
- { 34, 5, 12, 0, -32, 34, 0, }, /* 17 */
- { 34, 5, 12, 100, -32, 34, 0, }, /* 18 */
- { 34, 5, 12, 1, -32, 34, 0, }, /* 19 */
- { 10, 26, 12, 0, 0, 10, 0, }, /* 20 */
- { 10, 26, 14, 0, 0, 10, 0, }, /* 21 */
- { 34, 7, 12, 0, 0, 34, 0, }, /* 22 */
- { 10, 20, 12, 0, 0, 10, 0, }, /* 23 */
- { 10, 1, 2, 0, 0, 10, 0, }, /* 24 */
- { 10, 15, 12, 0, 0, 10, 0, }, /* 25 */
- { 10, 5, 12, 26, 775, 10, 0, }, /* 26 */
- { 10, 19, 12, 0, 0, 10, 0, }, /* 27 */
- { 34, 9, 12, 104, 32, 34, 0, }, /* 28 */
- { 34, 5, 12, 0, 7615, 34, 0, }, /* 29 */
- { 34, 5, 12, 104, -32, 34, 0, }, /* 30 */
- { 34, 5, 12, 0, 121, 34, 0, }, /* 31 */
- { 34, 9, 12, 0, 1, 34, 0, }, /* 32 */
- { 34, 5, 12, 0, -1, 34, 0, }, /* 33 */
- { 34, 9, 12, 0, 0, 34, 0, }, /* 34 */
- { 34, 5, 12, 0, 0, 34, 0, }, /* 35 */
- { 34, 9, 12, 0, -121, 34, 0, }, /* 36 */
- { 34, 5, 12, 1, -268, 34, 0, }, /* 37 */
- { 34, 5, 12, 0, 195, 34, 0, }, /* 38 */
- { 34, 9, 12, 0, 210, 34, 0, }, /* 39 */
- { 34, 9, 12, 0, 206, 34, 0, }, /* 40 */
- { 34, 9, 12, 0, 205, 34, 0, }, /* 41 */
- { 34, 9, 12, 0, 79, 34, 0, }, /* 42 */
- { 34, 9, 12, 0, 202, 34, 0, }, /* 43 */
- { 34, 9, 12, 0, 203, 34, 0, }, /* 44 */
- { 34, 9, 12, 0, 207, 34, 0, }, /* 45 */
- { 34, 5, 12, 0, 97, 34, 0, }, /* 46 */
- { 34, 9, 12, 0, 211, 34, 0, }, /* 47 */
- { 34, 9, 12, 0, 209, 34, 0, }, /* 48 */
- { 34, 5, 12, 0, 163, 34, 0, }, /* 49 */
- { 34, 9, 12, 0, 213, 34, 0, }, /* 50 */
- { 34, 5, 12, 0, 130, 34, 0, }, /* 51 */
- { 34, 9, 12, 0, 214, 34, 0, }, /* 52 */
- { 34, 9, 12, 0, 218, 34, 0, }, /* 53 */
- { 34, 9, 12, 0, 217, 34, 0, }, /* 54 */
- { 34, 9, 12, 0, 219, 34, 0, }, /* 55 */
- { 34, 5, 12, 0, 56, 34, 0, }, /* 56 */
- { 34, 9, 12, 5, 2, 34, 0, }, /* 57 */
- { 34, 8, 12, 5, 1, 34, 0, }, /* 58 */
- { 34, 5, 12, 5, -2, 34, 0, }, /* 59 */
- { 34, 9, 12, 9, 2, 34, 0, }, /* 60 */
- { 34, 8, 12, 9, 1, 34, 0, }, /* 61 */
- { 34, 5, 12, 9, -2, 34, 0, }, /* 62 */
- { 34, 9, 12, 13, 2, 34, 0, }, /* 63 */
- { 34, 8, 12, 13, 1, 34, 0, }, /* 64 */
- { 34, 5, 12, 13, -2, 34, 0, }, /* 65 */
- { 34, 5, 12, 0, -79, 34, 0, }, /* 66 */
- { 34, 9, 12, 17, 2, 34, 0, }, /* 67 */
- { 34, 8, 12, 17, 1, 34, 0, }, /* 68 */
- { 34, 5, 12, 17, -2, 34, 0, }, /* 69 */
- { 34, 9, 12, 0, -97, 34, 0, }, /* 70 */
- { 34, 9, 12, 0, -56, 34, 0, }, /* 71 */
- { 34, 9, 12, 0, -130, 34, 0, }, /* 72 */
- { 34, 9, 12, 0, 10795, 34, 0, }, /* 73 */
- { 34, 9, 12, 0, -163, 34, 0, }, /* 74 */
- { 34, 9, 12, 0, 10792, 34, 0, }, /* 75 */
- { 34, 5, 12, 0, 10815, 34, 0, }, /* 76 */
- { 34, 9, 12, 0, -195, 34, 0, }, /* 77 */
- { 34, 9, 12, 0, 69, 34, 0, }, /* 78 */
- { 34, 9, 12, 0, 71, 34, 0, }, /* 79 */
- { 34, 5, 12, 0, 10783, 34, 0, }, /* 80 */
- { 34, 5, 12, 0, 10780, 34, 0, }, /* 81 */
- { 34, 5, 12, 0, 10782, 34, 0, }, /* 82 */
- { 34, 5, 12, 0, -210, 34, 0, }, /* 83 */
- { 34, 5, 12, 0, -206, 34, 0, }, /* 84 */
- { 34, 5, 12, 0, -205, 34, 0, }, /* 85 */
- { 34, 5, 12, 0, -202, 34, 0, }, /* 86 */
- { 34, 5, 12, 0, -203, 34, 0, }, /* 87 */
- { 34, 5, 12, 0, 42319, 34, 0, }, /* 88 */
- { 34, 5, 12, 0, 42315, 34, 0, }, /* 89 */
- { 34, 5, 12, 0, -207, 34, 0, }, /* 90 */
- { 34, 5, 12, 0, 42280, 34, 0, }, /* 91 */
- { 34, 5, 12, 0, 42308, 34, 0, }, /* 92 */
- { 34, 5, 12, 0, -209, 34, 0, }, /* 93 */
- { 34, 5, 12, 0, -211, 34, 0, }, /* 94 */
- { 34, 5, 12, 0, 10743, 34, 0, }, /* 95 */
- { 34, 5, 12, 0, 42305, 34, 0, }, /* 96 */
- { 34, 5, 12, 0, 10749, 34, 0, }, /* 97 */
- { 34, 5, 12, 0, -213, 34, 0, }, /* 98 */
- { 34, 5, 12, 0, -214, 34, 0, }, /* 99 */
- { 34, 5, 12, 0, 10727, 34, 0, }, /* 100 */
- { 34, 5, 12, 0, -218, 34, 0, }, /* 101 */
- { 34, 5, 12, 0, 42282, 34, 0, }, /* 102 */
- { 34, 5, 12, 0, -69, 34, 0, }, /* 103 */
- { 34, 5, 12, 0, -217, 34, 0, }, /* 104 */
- { 34, 5, 12, 0, -71, 34, 0, }, /* 105 */
- { 34, 5, 12, 0, -219, 34, 0, }, /* 106 */
- { 34, 5, 12, 0, 42261, 34, 0, }, /* 107 */
- { 34, 5, 12, 0, 42258, 34, 0, }, /* 108 */
- { 34, 6, 12, 0, 0, 34, 0, }, /* 109 */
- { 10, 6, 12, 0, 0, 10, 0, }, /* 110 */
- { 4, 24, 12, 0, 0, 4, 0, }, /* 111 */
- { 28, 12, 3, 0, 0, 28, 0, }, /* 112 */
- { 28, 12, 3, 0, 0, 20, 0, }, /* 113 */
- { 28, 12, 3, 21, 116, 20, 0, }, /* 114 */
- { 28, 12, 3, 0, 0, 34, 0, }, /* 115 */
- { 20, 9, 12, 0, 1, 20, 0, }, /* 116 */
- { 20, 5, 12, 0, -1, 20, 0, }, /* 117 */
- { 20, 24, 12, 0, 0, 20, 0, }, /* 118 */
- { 0, 2, 12, 0, 0, 0, 0, }, /* 119 */
- { 20, 6, 12, 0, 0, 20, 0, }, /* 120 */
- { 20, 5, 12, 0, 130, 20, 0, }, /* 121 */
- { 20, 9, 12, 0, 116, 20, 0, }, /* 122 */
- { 20, 9, 12, 0, 38, 20, 0, }, /* 123 */
- { 20, 9, 12, 0, 37, 20, 0, }, /* 124 */
- { 20, 9, 12, 0, 64, 20, 0, }, /* 125 */
- { 20, 9, 12, 0, 63, 20, 0, }, /* 126 */
- { 20, 5, 12, 0, 0, 20, 0, }, /* 127 */
- { 20, 9, 12, 0, 32, 20, 0, }, /* 128 */
- { 20, 9, 12, 34, 32, 20, 0, }, /* 129 */
- { 20, 9, 12, 59, 32, 20, 0, }, /* 130 */
- { 20, 9, 12, 38, 32, 20, 0, }, /* 131 */
- { 20, 9, 12, 21, 32, 20, 0, }, /* 132 */
- { 20, 9, 12, 51, 32, 20, 0, }, /* 133 */
- { 20, 9, 12, 26, 32, 20, 0, }, /* 134 */
- { 20, 9, 12, 47, 32, 20, 0, }, /* 135 */
- { 20, 9, 12, 55, 32, 20, 0, }, /* 136 */
- { 20, 9, 12, 30, 32, 20, 0, }, /* 137 */
- { 20, 9, 12, 43, 32, 20, 0, }, /* 138 */
- { 20, 9, 12, 96, 32, 20, 0, }, /* 139 */
- { 20, 5, 12, 0, -38, 20, 0, }, /* 140 */
- { 20, 5, 12, 0, -37, 20, 0, }, /* 141 */
- { 20, 5, 12, 0, -32, 20, 0, }, /* 142 */
- { 20, 5, 12, 34, -32, 20, 0, }, /* 143 */
- { 20, 5, 12, 59, -32, 20, 0, }, /* 144 */
- { 20, 5, 12, 38, -32, 20, 0, }, /* 145 */
- { 20, 5, 12, 21, -116, 20, 0, }, /* 146 */
- { 20, 5, 12, 51, -32, 20, 0, }, /* 147 */
- { 20, 5, 12, 26, -775, 20, 0, }, /* 148 */
- { 20, 5, 12, 47, -32, 20, 0, }, /* 149 */
- { 20, 5, 12, 55, -32, 20, 0, }, /* 150 */
- { 20, 5, 12, 30, 1, 20, 0, }, /* 151 */
- { 20, 5, 12, 30, -32, 20, 0, }, /* 152 */
- { 20, 5, 12, 43, -32, 20, 0, }, /* 153 */
- { 20, 5, 12, 96, -32, 20, 0, }, /* 154 */
- { 20, 5, 12, 0, -64, 20, 0, }, /* 155 */
- { 20, 5, 12, 0, -63, 20, 0, }, /* 156 */
- { 20, 9, 12, 0, 8, 20, 0, }, /* 157 */
- { 20, 5, 12, 34, -30, 20, 0, }, /* 158 */
- { 20, 5, 12, 38, -25, 20, 0, }, /* 159 */
- { 20, 9, 12, 0, 0, 20, 0, }, /* 160 */
- { 20, 5, 12, 43, -15, 20, 0, }, /* 161 */
- { 20, 5, 12, 47, -22, 20, 0, }, /* 162 */
- { 20, 5, 12, 0, -8, 20, 0, }, /* 163 */
- { 11, 9, 12, 0, 1, 11, 0, }, /* 164 */
- { 11, 5, 12, 0, -1, 11, 0, }, /* 165 */
- { 20, 5, 12, 51, -54, 20, 0, }, /* 166 */
- { 20, 5, 12, 55, -48, 20, 0, }, /* 167 */
- { 20, 5, 12, 0, 7, 20, 0, }, /* 168 */
- { 20, 5, 12, 0, -116, 20, 0, }, /* 169 */
- { 20, 9, 12, 38, -60, 20, 0, }, /* 170 */
- { 20, 5, 12, 59, -64, 20, 0, }, /* 171 */
- { 20, 25, 12, 0, 0, 20, 0, }, /* 172 */
- { 20, 9, 12, 0, -7, 20, 0, }, /* 173 */
- { 20, 9, 12, 0, -130, 20, 0, }, /* 174 */
- { 13, 9, 12, 0, 80, 13, 0, }, /* 175 */
- { 13, 9, 12, 0, 32, 13, 0, }, /* 176 */
- { 13, 9, 12, 63, 32, 13, 0, }, /* 177 */
- { 13, 9, 12, 67, 32, 13, 0, }, /* 178 */
- { 13, 9, 12, 71, 32, 13, 0, }, /* 179 */
- { 13, 9, 12, 75, 32, 13, 0, }, /* 180 */
- { 13, 9, 12, 79, 32, 13, 0, }, /* 181 */
- { 13, 9, 12, 84, 32, 13, 0, }, /* 182 */
- { 13, 5, 12, 0, -32, 13, 0, }, /* 183 */
- { 13, 5, 12, 63, -32, 13, 0, }, /* 184 */
- { 13, 5, 12, 67, -32, 13, 0, }, /* 185 */
- { 13, 5, 12, 71, -32, 13, 0, }, /* 186 */
- { 13, 5, 12, 75, -32, 13, 0, }, /* 187 */
- { 13, 5, 12, 79, -32, 13, 0, }, /* 188 */
- { 13, 5, 12, 84, -32, 13, 0, }, /* 189 */
- { 13, 5, 12, 0, -80, 13, 0, }, /* 190 */
- { 13, 9, 12, 0, 1, 13, 0, }, /* 191 */
- { 13, 5, 12, 0, -1, 13, 0, }, /* 192 */
- { 13, 9, 12, 88, 1, 13, 0, }, /* 193 */
- { 13, 5, 12, 88, -1, 13, 0, }, /* 194 */
- { 13, 26, 12, 0, 0, 13, 0, }, /* 195 */
- { 13, 12, 3, 0, 0, -34, 0, }, /* 196 */
- { 13, 12, 3, 0, 0, -28, 0, }, /* 197 */
- { 28, 12, 3, 0, 0, -31, 0, }, /* 198 */
- { 13, 11, 3, 0, 0, 13, 0, }, /* 199 */
- { 13, 9, 12, 0, 15, 13, 0, }, /* 200 */
- { 13, 5, 12, 0, -15, 13, 0, }, /* 201 */
- { 2, 9, 12, 0, 48, 2, 0, }, /* 202 */
- { 2, 6, 12, 0, 0, 2, 0, }, /* 203 */
- { 2, 21, 12, 0, 0, 2, 0, }, /* 204 */
- { 2, 5, 12, 0, 0, 2, 0, }, /* 205 */
- { 2, 5, 12, 0, -48, 2, 0, }, /* 206 */
- { 10, 21, 12, 0, 0, -13, 0, }, /* 207 */
- { 2, 17, 12, 0, 0, 2, 0, }, /* 208 */
- { 2, 26, 12, 0, 0, 2, 0, }, /* 209 */
- { 2, 23, 12, 0, 0, 2, 0, }, /* 210 */
- { 26, 12, 3, 0, 0, 26, 0, }, /* 211 */
- { 26, 17, 12, 0, 0, 26, 0, }, /* 212 */
- { 26, 21, 12, 0, 0, 26, 0, }, /* 213 */
- { 26, 7, 12, 0, 0, 26, 0, }, /* 214 */
- { 1, 1, 4, 0, 0, 1, 0, }, /* 215 */
- { 10, 1, 4, 0, 0, 10, 0, }, /* 216 */
- { 1, 25, 12, 0, 0, 1, 0, }, /* 217 */
- { 1, 21, 12, 0, 0, 1, 0, }, /* 218 */
- { 1, 23, 12, 0, 0, 1, 0, }, /* 219 */
- { 10, 21, 12, 0, 0, -96, 0, }, /* 220 */
- { 1, 26, 12, 0, 0, 1, 0, }, /* 221 */
- { 1, 12, 3, 0, 0, 1, 0, }, /* 222 */
- { 1, 1, 2, 0, 0, -64, 0, }, /* 223 */
- { 1, 7, 12, 0, 0, 1, 0, }, /* 224 */
- { 10, 6, 12, 0, 0, -136, 0, }, /* 225 */
- { 28, 12, 3, 0, 0, -7, 0, }, /* 226 */
- { 1, 13, 12, 0, 0, -10, 0, }, /* 227 */
- { 1, 21, 12, 0, 0, -4, 0, }, /* 228 */
- { 1, 6, 12, 0, 0, 1, 0, }, /* 229 */
- { 1, 13, 12, 0, 0, 1, 0, }, /* 230 */
- { 50, 21, 12, 0, 0, 50, 0, }, /* 231 */
- { 50, 1, 4, 0, 0, 50, 0, }, /* 232 */
- { 50, 7, 12, 0, 0, 50, 0, }, /* 233 */
- { 50, 12, 3, 0, 0, 50, 0, }, /* 234 */
- { 56, 7, 12, 0, 0, 56, 0, }, /* 235 */
- { 56, 12, 3, 0, 0, 56, 0, }, /* 236 */
- { 64, 13, 12, 0, 0, 64, 0, }, /* 237 */
- { 64, 7, 12, 0, 0, 64, 0, }, /* 238 */
- { 64, 12, 3, 0, 0, 64, 0, }, /* 239 */
- { 64, 6, 12, 0, 0, 64, 0, }, /* 240 */
- { 64, 26, 12, 0, 0, 64, 0, }, /* 241 */
- { 64, 21, 12, 0, 0, 64, 0, }, /* 242 */
- { 64, 23, 12, 0, 0, 64, 0, }, /* 243 */
- { 90, 7, 12, 0, 0, 90, 0, }, /* 244 */
- { 90, 12, 3, 0, 0, 90, 0, }, /* 245 */
- { 90, 6, 12, 0, 0, 90, 0, }, /* 246 */
- { 90, 21, 12, 0, 0, 90, 0, }, /* 247 */
- { 95, 7, 12, 0, 0, 95, 0, }, /* 248 */
- { 95, 12, 3, 0, 0, 95, 0, }, /* 249 */
- { 95, 21, 12, 0, 0, 95, 0, }, /* 250 */
- { 15, 12, 3, 0, 0, 15, 0, }, /* 251 */
- { 15, 10, 5, 0, 0, 15, 0, }, /* 252 */
- { 15, 7, 12, 0, 0, 15, 0, }, /* 253 */
- { 28, 12, 3, 0, 0, -183, 0, }, /* 254 */
- { 28, 12, 3, 0, 0, -157, 0, }, /* 255 */
- { 10, 21, 12, 0, 0, -211, 0, }, /* 256 */
- { 10, 21, 12, 0, 0, -230, 0, }, /* 257 */
- { 15, 13, 12, 0, 0, -111, 0, }, /* 258 */
- { 15, 21, 12, 0, 0, 15, 0, }, /* 259 */
- { 15, 6, 12, 0, 0, 15, 0, }, /* 260 */
- { 3, 7, 12, 0, 0, 3, 0, }, /* 261 */
- { 3, 12, 3, 0, 0, 3, 0, }, /* 262 */
- { 3, 10, 5, 0, 0, 3, 0, }, /* 263 */
- { 3, 10, 3, 0, 0, 3, 0, }, /* 264 */
- { 3, 13, 12, 0, 0, -68, 0, }, /* 265 */
- { 3, 23, 12, 0, 0, 3, 0, }, /* 266 */
- { 3, 15, 12, 0, 0, 3, 0, }, /* 267 */
- { 3, 26, 12, 0, 0, 3, 0, }, /* 268 */
- { 3, 21, 12, 0, 0, 3, 0, }, /* 269 */
- { 22, 12, 3, 0, 0, 22, 0, }, /* 270 */
- { 22, 10, 5, 0, 0, 22, 0, }, /* 271 */
- { 22, 7, 12, 0, 0, 22, 0, }, /* 272 */
- { 22, 13, 12, 0, 0, -55, 0, }, /* 273 */
- { 22, 21, 12, 0, 0, 22, 0, }, /* 274 */
- { 21, 12, 3, 0, 0, 21, 0, }, /* 275 */
- { 21, 10, 5, 0, 0, 21, 0, }, /* 276 */
- { 21, 7, 12, 0, 0, 21, 0, }, /* 277 */
- { 21, 13, 12, 0, 0, -52, 0, }, /* 278 */
- { 21, 21, 12, 0, 0, 21, 0, }, /* 279 */
- { 21, 23, 12, 0, 0, 21, 0, }, /* 280 */
- { 44, 12, 3, 0, 0, 44, 0, }, /* 281 */
- { 44, 10, 5, 0, 0, 44, 0, }, /* 282 */
- { 44, 7, 12, 0, 0, 44, 0, }, /* 283 */
- { 44, 10, 3, 0, 0, 44, 0, }, /* 284 */
- { 44, 13, 12, 0, 0, 44, 0, }, /* 285 */
- { 44, 26, 12, 0, 0, 44, 0, }, /* 286 */
- { 44, 15, 12, 0, 0, 44, 0, }, /* 287 */
- { 54, 12, 3, 0, 0, 54, 0, }, /* 288 */
- { 54, 7, 12, 0, 0, 54, 0, }, /* 289 */
- { 54, 10, 3, 0, 0, 54, 0, }, /* 290 */
- { 54, 10, 5, 0, 0, 54, 0, }, /* 291 */
- { 54, 13, 12, 0, 0, -49, 0, }, /* 292 */
- { 54, 15, 12, 0, 0, -49, 0, }, /* 293 */
- { 54, 26, 12, 0, 0, -49, 0, }, /* 294 */
- { 54, 26, 12, 0, 0, 54, 0, }, /* 295 */
- { 54, 23, 12, 0, 0, 54, 0, }, /* 296 */
- { 55, 12, 3, 0, 0, 55, 0, }, /* 297 */
- { 55, 10, 5, 0, 0, 55, 0, }, /* 298 */
- { 55, 7, 12, 0, 0, 55, 0, }, /* 299 */
- { 55, 13, 12, 0, 0, 55, 0, }, /* 300 */
- { 55, 15, 12, 0, 0, 55, 0, }, /* 301 */
- { 55, 26, 12, 0, 0, 55, 0, }, /* 302 */
- { 29, 7, 12, 0, 0, 29, 0, }, /* 303 */
- { 29, 12, 3, 0, 0, 29, 0, }, /* 304 */
- { 29, 10, 5, 0, 0, 29, 0, }, /* 305 */
- { 29, 21, 12, 0, 0, 29, 0, }, /* 306 */
- { 29, 10, 3, 0, 0, 29, 0, }, /* 307 */
- { 29, 13, 12, 0, 0, 29, 0, }, /* 308 */
- { 37, 12, 3, 0, 0, 37, 0, }, /* 309 */
- { 37, 10, 5, 0, 0, 37, 0, }, /* 310 */
- { 37, 7, 12, 0, 0, 37, 0, }, /* 311 */
- { 37, 10, 3, 0, 0, 37, 0, }, /* 312 */
- { 37, 7, 4, 0, 0, 37, 0, }, /* 313 */
- { 37, 26, 12, 0, 0, 37, 0, }, /* 314 */
- { 37, 15, 12, 0, 0, 37, 0, }, /* 315 */
- { 37, 13, 12, 0, 0, 37, 0, }, /* 316 */
- { 48, 10, 5, 0, 0, 48, 0, }, /* 317 */
- { 48, 7, 12, 0, 0, 48, 0, }, /* 318 */
- { 48, 12, 3, 0, 0, 48, 0, }, /* 319 */
- { 48, 10, 3, 0, 0, 48, 0, }, /* 320 */
- { 48, 13, 12, 0, 0, 48, 0, }, /* 321 */
- { 48, 21, 12, 0, 0, 48, 0, }, /* 322 */
- { 57, 7, 12, 0, 0, 57, 0, }, /* 323 */
- { 57, 12, 3, 0, 0, 57, 0, }, /* 324 */
- { 57, 7, 5, 0, 0, 57, 0, }, /* 325 */
- { 57, 6, 12, 0, 0, 57, 0, }, /* 326 */
- { 57, 21, 12, 0, 0, 57, 0, }, /* 327 */
- { 57, 13, 12, 0, 0, 57, 0, }, /* 328 */
- { 33, 7, 12, 0, 0, 33, 0, }, /* 329 */
- { 33, 12, 3, 0, 0, 33, 0, }, /* 330 */
- { 33, 7, 5, 0, 0, 33, 0, }, /* 331 */
- { 33, 6, 12, 0, 0, 33, 0, }, /* 332 */
- { 33, 13, 12, 0, 0, 33, 0, }, /* 333 */
- { 58, 7, 12, 0, 0, 58, 0, }, /* 334 */
- { 58, 26, 12, 0, 0, 58, 0, }, /* 335 */
- { 58, 21, 12, 0, 0, 58, 0, }, /* 336 */
- { 58, 12, 3, 0, 0, 58, 0, }, /* 337 */
- { 58, 13, 12, 0, 0, 58, 0, }, /* 338 */
- { 58, 15, 12, 0, 0, 58, 0, }, /* 339 */
- { 58, 22, 12, 0, 0, 58, 0, }, /* 340 */
- { 58, 18, 12, 0, 0, 58, 0, }, /* 341 */
- { 58, 10, 5, 0, 0, 58, 0, }, /* 342 */
- { 39, 7, 12, 0, 0, 39, 0, }, /* 343 */
- { 39, 10, 12, 0, 0, 39, 0, }, /* 344 */
- { 39, 12, 3, 0, 0, 39, 0, }, /* 345 */
- { 39, 10, 5, 0, 0, 39, 0, }, /* 346 */
- { 39, 13, 12, 0, 0, -72, 0, }, /* 347 */
- { 39, 21, 12, 0, 0, 39, 0, }, /* 348 */
- { 39, 13, 12, 0, 0, 39, 0, }, /* 349 */
- { 39, 26, 12, 0, 0, 39, 0, }, /* 350 */
- { 17, 9, 12, 0, 7264, 17, 0, }, /* 351 */
- { 17, 5, 12, 0, 3008, 17, 0, }, /* 352 */
- { 10, 21, 12, 0, 0, -46, 0, }, /* 353 */
- { 17, 6, 12, 0, 0, 17, 0, }, /* 354 */
- { 24, 7, 6, 0, 0, 24, 0, }, /* 355 */
- { 24, 7, 7, 0, 0, 24, 0, }, /* 356 */
- { 24, 7, 8, 0, 0, 24, 0, }, /* 357 */
- { 16, 7, 12, 0, 0, 16, 0, }, /* 358 */
- { 16, 12, 3, 0, 0, 16, 0, }, /* 359 */
- { 16, 21, 12, 0, 0, 16, 0, }, /* 360 */
- { 16, 15, 12, 0, 0, 16, 0, }, /* 361 */
- { 16, 26, 12, 0, 0, 16, 0, }, /* 362 */
- { 9, 9, 12, 0, 38864, 9, 0, }, /* 363 */
- { 9, 9, 12, 0, 8, 9, 0, }, /* 364 */
- { 9, 5, 12, 0, -8, 9, 0, }, /* 365 */
- { 8, 17, 12, 0, 0, 8, 0, }, /* 366 */
- { 8, 7, 12, 0, 0, 8, 0, }, /* 367 */
- { 8, 21, 12, 0, 0, 8, 0, }, /* 368 */
- { 41, 29, 12, 0, 0, 41, 0, }, /* 369 */
- { 41, 7, 12, 0, 0, 41, 0, }, /* 370 */
- { 41, 22, 12, 0, 0, 41, 0, }, /* 371 */
- { 41, 18, 12, 0, 0, 41, 0, }, /* 372 */
- { 46, 7, 12, 0, 0, 46, 0, }, /* 373 */
- { 46, 14, 12, 0, 0, 46, 0, }, /* 374 */
- { 51, 7, 12, 0, 0, 51, 0, }, /* 375 */
- { 51, 12, 3, 0, 0, 51, 0, }, /* 376 */
- { 25, 7, 12, 0, 0, 25, 0, }, /* 377 */
- { 25, 12, 3, 0, 0, 25, 0, }, /* 378 */
- { 10, 21, 12, 0, 0, -106, 0, }, /* 379 */
- { 7, 7, 12, 0, 0, 7, 0, }, /* 380 */
- { 7, 12, 3, 0, 0, 7, 0, }, /* 381 */
- { 52, 7, 12, 0, 0, 52, 0, }, /* 382 */
- { 52, 12, 3, 0, 0, 52, 0, }, /* 383 */
- { 32, 7, 12, 0, 0, 32, 0, }, /* 384 */
- { 32, 12, 3, 0, 0, 32, 0, }, /* 385 */
- { 32, 10, 5, 0, 0, 32, 0, }, /* 386 */
- { 32, 21, 12, 0, 0, 32, 0, }, /* 387 */
- { 32, 6, 12, 0, 0, 32, 0, }, /* 388 */
- { 32, 23, 12, 0, 0, 32, 0, }, /* 389 */
- { 32, 13, 12, 0, 0, 32, 0, }, /* 390 */
- { 32, 15, 12, 0, 0, 32, 0, }, /* 391 */
- { 38, 21, 12, 0, 0, 38, 0, }, /* 392 */
- { 10, 21, 12, 0, 0, -61, 0, }, /* 393 */
- { 38, 17, 12, 0, 0, 38, 0, }, /* 394 */
- { 38, 12, 3, 0, 0, 38, 0, }, /* 395 */
- { 38, 1, 2, 0, 0, 38, 0, }, /* 396 */
- { 38, 13, 12, 0, 0, 38, 0, }, /* 397 */
- { 38, 7, 12, 0, 0, 38, 0, }, /* 398 */
- { 38, 6, 12, 0, 0, 38, 0, }, /* 399 */
- { 35, 7, 12, 0, 0, 35, 0, }, /* 400 */
- { 35, 12, 3, 0, 0, 35, 0, }, /* 401 */
- { 35, 10, 5, 0, 0, 35, 0, }, /* 402 */
- { 35, 26, 12, 0, 0, 35, 0, }, /* 403 */
- { 35, 21, 12, 0, 0, 35, 0, }, /* 404 */
- { 35, 13, 12, 0, 0, 35, 0, }, /* 405 */
- { 53, 7, 12, 0, 0, 53, 0, }, /* 406 */
- { 40, 7, 12, 0, 0, 40, 0, }, /* 407 */
- { 40, 13, 12, 0, 0, 40, 0, }, /* 408 */
- { 40, 15, 12, 0, 0, 40, 0, }, /* 409 */
- { 40, 26, 12, 0, 0, 40, 0, }, /* 410 */
- { 32, 26, 12, 0, 0, 32, 0, }, /* 411 */
- { 6, 7, 12, 0, 0, 6, 0, }, /* 412 */
- { 6, 12, 3, 0, 0, 6, 0, }, /* 413 */
- { 6, 10, 5, 0, 0, 6, 0, }, /* 414 */
- { 6, 21, 12, 0, 0, 6, 0, }, /* 415 */
- { 91, 7, 12, 0, 0, 91, 0, }, /* 416 */
- { 91, 10, 5, 0, 0, 91, 0, }, /* 417 */
- { 91, 12, 3, 0, 0, 91, 0, }, /* 418 */
- { 91, 10, 12, 0, 0, 91, 0, }, /* 419 */
- { 91, 13, 12, 0, 0, 91, 0, }, /* 420 */
- { 91, 21, 12, 0, 0, 91, 0, }, /* 421 */
- { 91, 6, 12, 0, 0, 91, 0, }, /* 422 */
- { 28, 11, 3, 0, 0, 28, 0, }, /* 423 */
- { 62, 12, 3, 0, 0, 62, 0, }, /* 424 */
- { 62, 10, 5, 0, 0, 62, 0, }, /* 425 */
- { 62, 7, 12, 0, 0, 62, 0, }, /* 426 */
- { 62, 13, 12, 0, 0, 62, 0, }, /* 427 */
- { 62, 21, 12, 0, 0, 62, 0, }, /* 428 */
- { 62, 26, 12, 0, 0, 62, 0, }, /* 429 */
- { 76, 12, 3, 0, 0, 76, 0, }, /* 430 */
- { 76, 10, 5, 0, 0, 76, 0, }, /* 431 */
- { 76, 7, 12, 0, 0, 76, 0, }, /* 432 */
- { 76, 13, 12, 0, 0, 76, 0, }, /* 433 */
- { 93, 7, 12, 0, 0, 93, 0, }, /* 434 */
- { 93, 12, 3, 0, 0, 93, 0, }, /* 435 */
- { 93, 10, 5, 0, 0, 93, 0, }, /* 436 */
- { 93, 21, 12, 0, 0, 93, 0, }, /* 437 */
- { 70, 7, 12, 0, 0, 70, 0, }, /* 438 */
- { 70, 10, 5, 0, 0, 70, 0, }, /* 439 */
- { 70, 12, 3, 0, 0, 70, 0, }, /* 440 */
- { 70, 21, 12, 0, 0, 70, 0, }, /* 441 */
- { 70, 13, 12, 0, 0, 70, 0, }, /* 442 */
- { 73, 13, 12, 0, 0, 73, 0, }, /* 443 */
- { 73, 7, 12, 0, 0, 73, 0, }, /* 444 */
- { 73, 6, 12, 0, 0, 73, 0, }, /* 445 */
- { 73, 21, 12, 0, 0, 73, 0, }, /* 446 */
- { 13, 5, 12, 63, -6222, 13, 0, }, /* 447 */
- { 13, 5, 12, 67, -6221, 13, 0, }, /* 448 */
- { 13, 5, 12, 71, -6212, 13, 0, }, /* 449 */
- { 13, 5, 12, 75, -6210, 13, 0, }, /* 450 */
- { 13, 5, 12, 79, -6210, 13, 0, }, /* 451 */
- { 13, 5, 12, 79, -6211, 13, 0, }, /* 452 */
- { 13, 5, 12, 84, -6204, 13, 0, }, /* 453 */
- { 13, 5, 12, 88, -6180, 13, 0, }, /* 454 */
- { 13, 5, 12, 108, 35267, 13, 0, }, /* 455 */
- { 17, 9, 12, 0, -3008, 17, 0, }, /* 456 */
- { 76, 21, 12, 0, 0, 76, 0, }, /* 457 */
- { 28, 12, 3, 0, 0, -101, 0, }, /* 458 */
- { 28, 12, 3, 0, 0, 15, 0, }, /* 459 */
- { 10, 21, 12, 0, 0, -37, 0, }, /* 460 */
- { 28, 12, 3, 0, 0, -16, 0, }, /* 461 */
- { 28, 12, 3, 0, 0, -40, 0, }, /* 462 */
- { 28, 12, 3, 0, 0, -129, 0, }, /* 463 */
- { 10, 10, 5, 0, 0, -16, 0, }, /* 464 */
- { 10, 7, 12, 0, 0, 15, 0, }, /* 465 */
- { 10, 7, 12, 0, 0, -16, 0, }, /* 466 */
- { 10, 10, 5, 0, 0, -37, 0, }, /* 467 */
- { 28, 12, 3, 0, 0, -80, 0, }, /* 468 */
- { 10, 10, 5, 0, 0, 3, 0, }, /* 469 */
- { 28, 12, 3, 0, 0, -37, 0, }, /* 470 */
- { 13, 5, 12, 0, 0, 13, 0, }, /* 471 */
- { 13, 6, 12, 0, 0, 13, 0, }, /* 472 */
- { 34, 5, 12, 0, 35332, 34, 0, }, /* 473 */
- { 34, 5, 12, 0, 3814, 34, 0, }, /* 474 */
- { 34, 9, 12, 92, 1, 34, 0, }, /* 475 */
- { 34, 5, 12, 92, -1, 34, 0, }, /* 476 */
- { 34, 5, 12, 92, -58, 34, 0, }, /* 477 */
- { 34, 9, 12, 0, -7615, 34, 0, }, /* 478 */
- { 20, 5, 12, 0, 8, 20, 0, }, /* 479 */
- { 20, 9, 12, 0, -8, 20, 0, }, /* 480 */
- { 20, 5, 12, 0, 74, 20, 0, }, /* 481 */
- { 20, 5, 12, 0, 86, 20, 0, }, /* 482 */
- { 20, 5, 12, 0, 100, 20, 0, }, /* 483 */
- { 20, 5, 12, 0, 128, 20, 0, }, /* 484 */
- { 20, 5, 12, 0, 112, 20, 0, }, /* 485 */
- { 20, 5, 12, 0, 126, 20, 0, }, /* 486 */
- { 20, 8, 12, 0, -8, 20, 0, }, /* 487 */
- { 20, 5, 12, 0, 9, 20, 0, }, /* 488 */
- { 20, 9, 12, 0, -74, 20, 0, }, /* 489 */
- { 20, 8, 12, 0, -9, 20, 0, }, /* 490 */
- { 20, 5, 12, 21, -7173, 20, 0, }, /* 491 */
- { 20, 9, 12, 0, -86, 20, 0, }, /* 492 */
- { 20, 9, 12, 0, -100, 20, 0, }, /* 493 */
- { 20, 9, 12, 0, -112, 20, 0, }, /* 494 */
- { 20, 9, 12, 0, -128, 20, 0, }, /* 495 */
- { 20, 9, 12, 0, -126, 20, 0, }, /* 496 */
- { 28, 1, 3, 0, 0, 28, 0, }, /* 497 */
- { 28, 1, 13, 0, 0, 28, 0, }, /* 498 */
- { 10, 27, 2, 0, 0, 10, 0, }, /* 499 */
- { 10, 28, 2, 0, 0, 10, 0, }, /* 500 */
- { 10, 21, 14, 0, 0, 10, 0, }, /* 501 */
- { 0, 2, 2, 0, 0, 0, 0, }, /* 502 */
- { 28, 12, 3, 0, 0, -84, 0, }, /* 503 */
- { 10, 9, 12, 0, 0, 10, 0, }, /* 504 */
- { 10, 5, 12, 0, 0, 10, 0, }, /* 505 */
- { 20, 9, 12, 96, -7517, 20, 0, }, /* 506 */
- { 34, 9, 12, 100, -8383, 34, 0, }, /* 507 */
- { 34, 9, 12, 104, -8262, 34, 0, }, /* 508 */
- { 34, 9, 12, 0, 28, 34, 0, }, /* 509 */
- { 10, 7, 12, 0, 0, 10, 0, }, /* 510 */
- { 10, 5, 14, 0, 0, 10, 0, }, /* 511 */
- { 34, 5, 12, 0, -28, 34, 0, }, /* 512 */
- { 34, 14, 12, 0, 16, 34, 0, }, /* 513 */
- { 34, 14, 12, 0, -16, 34, 0, }, /* 514 */
- { 34, 14, 12, 0, 0, 34, 0, }, /* 515 */
- { 10, 25, 14, 0, 0, 10, 0, }, /* 516 */
- { 10, 26, 12, 0, 26, 10, 0, }, /* 517 */
- { 10, 26, 14, 0, 26, 10, 0, }, /* 518 */
- { 10, 26, 12, 0, -26, 10, 0, }, /* 519 */
- { 5, 26, 12, 0, 0, 5, 0, }, /* 520 */
- { 18, 9, 12, 0, 48, 18, 0, }, /* 521 */
- { 18, 5, 12, 0, -48, 18, 0, }, /* 522 */
- { 34, 9, 12, 0, -10743, 34, 0, }, /* 523 */
- { 34, 9, 12, 0, -3814, 34, 0, }, /* 524 */
- { 34, 9, 12, 0, -10727, 34, 0, }, /* 525 */
- { 34, 5, 12, 0, -10795, 34, 0, }, /* 526 */
- { 34, 5, 12, 0, -10792, 34, 0, }, /* 527 */
- { 34, 9, 12, 0, -10780, 34, 0, }, /* 528 */
- { 34, 9, 12, 0, -10749, 34, 0, }, /* 529 */
- { 34, 9, 12, 0, -10783, 34, 0, }, /* 530 */
- { 34, 9, 12, 0, -10782, 34, 0, }, /* 531 */
- { 34, 9, 12, 0, -10815, 34, 0, }, /* 532 */
- { 11, 5, 12, 0, 0, 11, 0, }, /* 533 */
- { 11, 26, 12, 0, 0, 11, 0, }, /* 534 */
- { 11, 12, 3, 0, 0, 11, 0, }, /* 535 */
- { 11, 21, 12, 0, 0, 11, 0, }, /* 536 */
- { 11, 15, 12, 0, 0, 11, 0, }, /* 537 */
- { 17, 5, 12, 0, -7264, 17, 0, }, /* 538 */
- { 59, 7, 12, 0, 0, 59, 0, }, /* 539 */
- { 59, 6, 12, 0, 0, 59, 0, }, /* 540 */
- { 59, 21, 12, 0, 0, 59, 0, }, /* 541 */
- { 59, 12, 3, 0, 0, 59, 0, }, /* 542 */
- { 13, 12, 3, 0, 0, 13, 0, }, /* 543 */
- { 10, 21, 12, 0, 0, -28, 0, }, /* 544 */
- { 23, 26, 12, 0, 0, 23, 0, }, /* 545 */
- { 10, 21, 12, 0, 0, -122, 0, }, /* 546 */
- { 10, 21, 12, 0, 0, -116, 0, }, /* 547 */
- { 23, 6, 12, 0, 0, 23, 0, }, /* 548 */
- { 10, 7, 12, 0, 0, 23, 0, }, /* 549 */
- { 23, 14, 12, 0, 0, 23, 0, }, /* 550 */
- { 10, 22, 12, 0, 0, -122, 0, }, /* 551 */
- { 10, 18, 12, 0, 0, -122, 0, }, /* 552 */
- { 10, 26, 12, 0, 0, -116, 0, }, /* 553 */
- { 10, 17, 12, 0, 0, -116, 0, }, /* 554 */
- { 10, 22, 12, 0, 0, -116, 0, }, /* 555 */
- { 10, 18, 12, 0, 0, -116, 0, }, /* 556 */
- { 28, 12, 3, 0, 0, -19, 0, }, /* 557 */
- { 24, 10, 3, 0, 0, 24, 0, }, /* 558 */
- { 10, 17, 14, 0, 0, -116, 0, }, /* 559 */
- { 10, 6, 12, 0, 0, -58, 0, }, /* 560 */
- { 10, 7, 12, 0, 0, -88, 0, }, /* 561 */
- { 10, 21, 14, 0, 0, -88, 0, }, /* 562 */
- { 10, 26, 12, 0, 0, 23, 0, }, /* 563 */
- { 27, 7, 12, 0, 0, 27, 0, }, /* 564 */
- { 28, 12, 3, 0, 0, -58, 0, }, /* 565 */
- { 10, 24, 12, 0, 0, -58, 0, }, /* 566 */
- { 27, 6, 12, 0, 0, 27, 0, }, /* 567 */
- { 10, 17, 12, 0, 0, -58, 0, }, /* 568 */
- { 30, 7, 12, 0, 0, 30, 0, }, /* 569 */
- { 30, 6, 12, 0, 0, 30, 0, }, /* 570 */
- { 4, 7, 12, 0, 0, 4, 0, }, /* 571 */
- { 24, 7, 12, 0, 0, 24, 0, }, /* 572 */
- { 10, 15, 12, 0, 0, 23, 0, }, /* 573 */
- { 24, 26, 12, 0, 0, 24, 0, }, /* 574 */
- { 10, 26, 14, 0, 0, 23, 0, }, /* 575 */
- { 30, 26, 12, 0, 0, 30, 0, }, /* 576 */
- { 23, 7, 12, 0, 0, 23, 0, }, /* 577 */
- { 61, 7, 12, 0, 0, 61, 0, }, /* 578 */
- { 61, 6, 12, 0, 0, 61, 0, }, /* 579 */
- { 61, 26, 12, 0, 0, 61, 0, }, /* 580 */
- { 86, 7, 12, 0, 0, 86, 0, }, /* 581 */
- { 86, 6, 12, 0, 0, 86, 0, }, /* 582 */
- { 86, 21, 12, 0, 0, 86, 0, }, /* 583 */
- { 77, 7, 12, 0, 0, 77, 0, }, /* 584 */
- { 77, 6, 12, 0, 0, 77, 0, }, /* 585 */
- { 77, 21, 12, 0, 0, 77, 0, }, /* 586 */
- { 77, 13, 12, 0, 0, 77, 0, }, /* 587 */
- { 13, 9, 12, 108, 1, 13, 0, }, /* 588 */
- { 13, 5, 12, 108, -35267, 13, 0, }, /* 589 */
- { 13, 7, 12, 0, 0, 13, 0, }, /* 590 */
- { 13, 21, 12, 0, 0, 13, 0, }, /* 591 */
- { 79, 7, 12, 0, 0, 79, 0, }, /* 592 */
- { 79, 14, 12, 0, 0, 79, 0, }, /* 593 */
- { 79, 12, 3, 0, 0, 79, 0, }, /* 594 */
- { 79, 21, 12, 0, 0, 79, 0, }, /* 595 */
- { 34, 9, 12, 0, -35332, 34, 0, }, /* 596 */
- { 34, 9, 12, 0, -42280, 34, 0, }, /* 597 */
- { 34, 9, 12, 0, -42308, 34, 0, }, /* 598 */
- { 34, 9, 12, 0, -42319, 34, 0, }, /* 599 */
- { 34, 9, 12, 0, -42315, 34, 0, }, /* 600 */
- { 34, 9, 12, 0, -42305, 34, 0, }, /* 601 */
- { 34, 9, 12, 0, -42258, 34, 0, }, /* 602 */
- { 34, 9, 12, 0, -42282, 34, 0, }, /* 603 */
- { 34, 9, 12, 0, -42261, 34, 0, }, /* 604 */
- { 34, 9, 12, 0, 928, 34, 0, }, /* 605 */
- { 49, 7, 12, 0, 0, 49, 0, }, /* 606 */
- { 49, 12, 3, 0, 0, 49, 0, }, /* 607 */
- { 49, 10, 5, 0, 0, 49, 0, }, /* 608 */
- { 49, 26, 12, 0, 0, 49, 0, }, /* 609 */
- { 10, 15, 12, 0, 0, -197, 0, }, /* 610 */
- { 10, 15, 12, 0, 0, -170, 0, }, /* 611 */
- { 10, 26, 12, 0, 0, -145, 0, }, /* 612 */
- { 10, 23, 12, 0, 0, -145, 0, }, /* 613 */
- { 65, 7, 12, 0, 0, 65, 0, }, /* 614 */
- { 65, 21, 12, 0, 0, 65, 0, }, /* 615 */
- { 75, 10, 5, 0, 0, 75, 0, }, /* 616 */
- { 75, 7, 12, 0, 0, 75, 0, }, /* 617 */
- { 75, 12, 3, 0, 0, 75, 0, }, /* 618 */
- { 75, 21, 12, 0, 0, 75, 0, }, /* 619 */
- { 75, 13, 12, 0, 0, 75, 0, }, /* 620 */
- { 15, 12, 3, 0, 0, -16, 0, }, /* 621 */
- { 15, 7, 12, 0, 0, -43, 0, }, /* 622 */
- { 69, 13, 12, 0, 0, 69, 0, }, /* 623 */
- { 69, 7, 12, 0, 0, 69, 0, }, /* 624 */
- { 69, 12, 3, 0, 0, 69, 0, }, /* 625 */
- { 10, 21, 12, 0, 0, -92, 0, }, /* 626 */
- { 69, 21, 12, 0, 0, 69, 0, }, /* 627 */
- { 74, 7, 12, 0, 0, 74, 0, }, /* 628 */
- { 74, 12, 3, 0, 0, 74, 0, }, /* 629 */
- { 74, 10, 5, 0, 0, 74, 0, }, /* 630 */
- { 74, 21, 12, 0, 0, 74, 0, }, /* 631 */
- { 84, 12, 3, 0, 0, 84, 0, }, /* 632 */
- { 84, 10, 5, 0, 0, 84, 0, }, /* 633 */
- { 84, 7, 12, 0, 0, 84, 0, }, /* 634 */
- { 84, 21, 12, 0, 0, 84, 0, }, /* 635 */
- { 10, 6, 12, 0, 0, -22, 0, }, /* 636 */
- { 84, 13, 12, 0, 0, 84, 0, }, /* 637 */
- { 39, 6, 12, 0, 0, 39, 0, }, /* 638 */
- { 68, 7, 12, 0, 0, 68, 0, }, /* 639 */
- { 68, 12, 3, 0, 0, 68, 0, }, /* 640 */
- { 68, 10, 5, 0, 0, 68, 0, }, /* 641 */
- { 68, 13, 12, 0, 0, 68, 0, }, /* 642 */
- { 68, 21, 12, 0, 0, 68, 0, }, /* 643 */
- { 92, 7, 12, 0, 0, 92, 0, }, /* 644 */
- { 92, 12, 3, 0, 0, 92, 0, }, /* 645 */
- { 92, 6, 12, 0, 0, 92, 0, }, /* 646 */
- { 92, 21, 12, 0, 0, 92, 0, }, /* 647 */
- { 87, 7, 12, 0, 0, 87, 0, }, /* 648 */
- { 87, 10, 5, 0, 0, 87, 0, }, /* 649 */
- { 87, 12, 3, 0, 0, 87, 0, }, /* 650 */
- { 87, 21, 12, 0, 0, 87, 0, }, /* 651 */
- { 87, 6, 12, 0, 0, 87, 0, }, /* 652 */
- { 34, 5, 12, 0, -928, 34, 0, }, /* 653 */
- { 9, 5, 12, 0, -38864, 9, 0, }, /* 654 */
- { 87, 13, 12, 0, 0, 87, 0, }, /* 655 */
- { 24, 7, 9, 0, 0, 24, 0, }, /* 656 */
- { 24, 7, 10, 0, 0, 24, 0, }, /* 657 */
- { 0, 4, 2, 0, 0, 0, 0, }, /* 658 */
- { 0, 3, 12, 0, 0, 0, 0, }, /* 659 */
- { 26, 25, 12, 0, 0, 26, 0, }, /* 660 */
- { 1, 24, 12, 0, 0, 1, 0, }, /* 661 */
- { 1, 7, 12, 0, 0, -10, 0, }, /* 662 */
- { 1, 26, 12, 0, 0, -10, 0, }, /* 663 */
- { 10, 6, 3, 0, 0, -58, 0, }, /* 664 */
- { 36, 7, 12, 0, 0, 36, 0, }, /* 665 */
- { 10, 21, 12, 0, 0, -25, 0, }, /* 666 */
- { 10, 15, 12, 0, 0, -76, 0, }, /* 667 */
- { 10, 26, 12, 0, 0, -25, 0, }, /* 668 */
- { 20, 14, 12, 0, 0, 20, 0, }, /* 669 */
- { 20, 15, 12, 0, 0, 20, 0, }, /* 670 */
- { 20, 26, 12, 0, 0, 20, 0, }, /* 671 */
- { 71, 7, 12, 0, 0, 71, 0, }, /* 672 */
- { 67, 7, 12, 0, 0, 67, 0, }, /* 673 */
- { 28, 12, 3, 0, 0, -1, 0, }, /* 674 */
- { 10, 15, 12, 0, 0, -1, 0, }, /* 675 */
- { 42, 7, 12, 0, 0, 42, 0, }, /* 676 */
- { 42, 15, 12, 0, 0, 42, 0, }, /* 677 */
- { 19, 7, 12, 0, 0, 19, 0, }, /* 678 */
- { 19, 14, 12, 0, 0, 19, 0, }, /* 679 */
- { 118, 7, 12, 0, 0, 118, 0, }, /* 680 */
- { 118, 12, 3, 0, 0, 118, 0, }, /* 681 */
- { 60, 7, 12, 0, 0, 60, 0, }, /* 682 */
- { 60, 21, 12, 0, 0, 60, 0, }, /* 683 */
- { 43, 7, 12, 0, 0, 43, 0, }, /* 684 */
- { 43, 21, 12, 0, 0, 43, 0, }, /* 685 */
- { 43, 14, 12, 0, 0, 43, 0, }, /* 686 */
- { 14, 9, 12, 0, 40, 14, 0, }, /* 687 */
- { 14, 5, 12, 0, -40, 14, 0, }, /* 688 */
- { 47, 7, 12, 0, 0, 47, 0, }, /* 689 */
- { 45, 7, 12, 0, 0, 45, 0, }, /* 690 */
- { 45, 13, 12, 0, 0, 45, 0, }, /* 691 */
- { 136, 9, 12, 0, 40, 136, 0, }, /* 692 */
- { 136, 5, 12, 0, -40, 136, 0, }, /* 693 */
- { 106, 7, 12, 0, 0, 106, 0, }, /* 694 */
- { 104, 7, 12, 0, 0, 104, 0, }, /* 695 */
- { 104, 21, 12, 0, 0, 104, 0, }, /* 696 */
- { 110, 7, 12, 0, 0, 110, 0, }, /* 697 */
- { 12, 7, 12, 0, 0, 12, 0, }, /* 698 */
- { 81, 7, 12, 0, 0, 81, 0, }, /* 699 */
- { 81, 21, 12, 0, 0, 81, 0, }, /* 700 */
- { 81, 15, 12, 0, 0, 81, 0, }, /* 701 */
- { 120, 7, 12, 0, 0, 120, 0, }, /* 702 */
- { 120, 26, 12, 0, 0, 120, 0, }, /* 703 */
- { 120, 15, 12, 0, 0, 120, 0, }, /* 704 */
- { 116, 7, 12, 0, 0, 116, 0, }, /* 705 */
- { 116, 15, 12, 0, 0, 116, 0, }, /* 706 */
- { 128, 7, 12, 0, 0, 128, 0, }, /* 707 */
- { 128, 15, 12, 0, 0, 128, 0, }, /* 708 */
- { 66, 7, 12, 0, 0, 66, 0, }, /* 709 */
- { 66, 15, 12, 0, 0, 66, 0, }, /* 710 */
- { 66, 21, 12, 0, 0, 66, 0, }, /* 711 */
- { 72, 7, 12, 0, 0, 72, 0, }, /* 712 */
- { 72, 21, 12, 0, 0, 72, 0, }, /* 713 */
- { 98, 7, 12, 0, 0, 98, 0, }, /* 714 */
- { 97, 7, 12, 0, 0, 97, 0, }, /* 715 */
- { 97, 15, 12, 0, 0, 97, 0, }, /* 716 */
- { 31, 7, 12, 0, 0, 31, 0, }, /* 717 */
- { 31, 12, 3, 0, 0, 31, 0, }, /* 718 */
- { 31, 15, 12, 0, 0, 31, 0, }, /* 719 */
- { 31, 21, 12, 0, 0, 31, 0, }, /* 720 */
- { 88, 7, 12, 0, 0, 88, 0, }, /* 721 */
- { 88, 15, 12, 0, 0, 88, 0, }, /* 722 */
- { 88, 21, 12, 0, 0, 88, 0, }, /* 723 */
- { 117, 7, 12, 0, 0, 117, 0, }, /* 724 */
- { 117, 15, 12, 0, 0, 117, 0, }, /* 725 */
- { 112, 7, 12, 0, 0, 112, 0, }, /* 726 */
- { 112, 26, 12, 0, 0, 112, 0, }, /* 727 */
- { 112, 12, 3, 0, 0, 112, 0, }, /* 728 */
- { 112, 15, 12, 0, 0, 112, 0, }, /* 729 */
- { 112, 21, 12, 0, 0, 112, 0, }, /* 730 */
- { 78, 7, 12, 0, 0, 78, 0, }, /* 731 */
- { 78, 21, 12, 0, 0, 78, 0, }, /* 732 */
- { 83, 7, 12, 0, 0, 83, 0, }, /* 733 */
- { 83, 15, 12, 0, 0, 83, 0, }, /* 734 */
- { 82, 7, 12, 0, 0, 82, 0, }, /* 735 */
- { 82, 15, 12, 0, 0, 82, 0, }, /* 736 */
- { 121, 7, 12, 0, 0, 121, 0, }, /* 737 */
- { 121, 21, 12, 0, 0, 121, 0, }, /* 738 */
- { 121, 15, 12, 0, 0, 121, 0, }, /* 739 */
- { 89, 7, 12, 0, 0, 89, 0, }, /* 740 */
- { 130, 9, 12, 0, 64, 130, 0, }, /* 741 */
- { 130, 5, 12, 0, -64, 130, 0, }, /* 742 */
- { 130, 15, 12, 0, 0, 130, 0, }, /* 743 */
- { 144, 7, 12, 0, 0, 144, 0, }, /* 744 */
- { 144, 12, 3, 0, 0, 144, 0, }, /* 745 */
- { 144, 13, 12, 0, 0, 144, 0, }, /* 746 */
- { 1, 15, 12, 0, 0, 1, 0, }, /* 747 */
- { 147, 7, 12, 0, 0, 147, 0, }, /* 748 */
- { 147, 15, 12, 0, 0, 147, 0, }, /* 749 */
- { 148, 7, 12, 0, 0, 148, 0, }, /* 750 */
- { 148, 12, 3, 0, 0, 148, 0, }, /* 751 */
- { 148, 15, 12, 0, 0, 148, 0, }, /* 752 */
- { 148, 21, 12, 0, 0, 148, 0, }, /* 753 */
- { 94, 10, 5, 0, 0, 94, 0, }, /* 754 */
- { 94, 12, 3, 0, 0, 94, 0, }, /* 755 */
- { 94, 7, 12, 0, 0, 94, 0, }, /* 756 */
- { 94, 21, 12, 0, 0, 94, 0, }, /* 757 */
- { 94, 15, 12, 0, 0, 94, 0, }, /* 758 */
- { 94, 13, 12, 0, 0, 94, 0, }, /* 759 */
- { 85, 12, 3, 0, 0, 85, 0, }, /* 760 */
- { 85, 10, 5, 0, 0, 85, 0, }, /* 761 */
- { 85, 7, 12, 0, 0, 85, 0, }, /* 762 */
- { 85, 21, 12, 0, 0, 85, 0, }, /* 763 */
- { 85, 1, 4, 0, 0, 85, 0, }, /* 764 */
- { 101, 7, 12, 0, 0, 101, 0, }, /* 765 */
- { 101, 13, 12, 0, 0, 101, 0, }, /* 766 */
- { 96, 12, 3, 0, 0, 96, 0, }, /* 767 */
- { 96, 7, 12, 0, 0, 96, 0, }, /* 768 */
- { 96, 10, 5, 0, 0, 96, 0, }, /* 769 */
- { 96, 13, 12, 0, 0, 96, 0, }, /* 770 */
- { 96, 21, 12, 0, 0, 96, 0, }, /* 771 */
- { 111, 7, 12, 0, 0, 111, 0, }, /* 772 */
- { 111, 12, 3, 0, 0, 111, 0, }, /* 773 */
- { 111, 21, 12, 0, 0, 111, 0, }, /* 774 */
- { 100, 12, 3, 0, 0, 100, 0, }, /* 775 */
- { 100, 10, 5, 0, 0, 100, 0, }, /* 776 */
- { 100, 7, 12, 0, 0, 100, 0, }, /* 777 */
- { 100, 7, 4, 0, 0, 100, 0, }, /* 778 */
- { 100, 21, 12, 0, 0, 100, 0, }, /* 779 */
- { 100, 13, 12, 0, 0, 100, 0, }, /* 780 */
- { 48, 15, 12, 0, 0, 48, 0, }, /* 781 */
- { 108, 7, 12, 0, 0, 108, 0, }, /* 782 */
- { 108, 10, 5, 0, 0, 108, 0, }, /* 783 */
- { 108, 12, 3, 0, 0, 108, 0, }, /* 784 */
- { 108, 21, 12, 0, 0, 108, 0, }, /* 785 */
- { 129, 7, 12, 0, 0, 129, 0, }, /* 786 */
- { 129, 21, 12, 0, 0, 129, 0, }, /* 787 */
- { 109, 7, 12, 0, 0, 109, 0, }, /* 788 */
- { 109, 12, 3, 0, 0, 109, 0, }, /* 789 */
- { 109, 10, 5, 0, 0, 109, 0, }, /* 790 */
- { 109, 13, 12, 0, 0, 109, 0, }, /* 791 */
- { 107, 12, 3, 0, 0, 107, 0, }, /* 792 */
- { 107, 12, 3, 0, 0, -49, 0, }, /* 793 */
- { 107, 10, 5, 0, 0, 107, 0, }, /* 794 */
- { 107, 10, 5, 0, 0, -49, 0, }, /* 795 */
- { 107, 7, 12, 0, 0, 107, 0, }, /* 796 */
- { 28, 12, 3, 0, 0, -49, 0, }, /* 797 */
- { 107, 10, 3, 0, 0, 107, 0, }, /* 798 */
- { 135, 7, 12, 0, 0, 135, 0, }, /* 799 */
- { 135, 10, 5, 0, 0, 135, 0, }, /* 800 */
- { 135, 12, 3, 0, 0, 135, 0, }, /* 801 */
- { 135, 21, 12, 0, 0, 135, 0, }, /* 802 */
- { 135, 13, 12, 0, 0, 135, 0, }, /* 803 */
- { 124, 7, 12, 0, 0, 124, 0, }, /* 804 */
- { 124, 10, 3, 0, 0, 124, 0, }, /* 805 */
- { 124, 10, 5, 0, 0, 124, 0, }, /* 806 */
- { 124, 12, 3, 0, 0, 124, 0, }, /* 807 */
- { 124, 21, 12, 0, 0, 124, 0, }, /* 808 */
- { 124, 13, 12, 0, 0, 124, 0, }, /* 809 */
- { 123, 7, 12, 0, 0, 123, 0, }, /* 810 */
- { 123, 10, 3, 0, 0, 123, 0, }, /* 811 */
- { 123, 10, 5, 0, 0, 123, 0, }, /* 812 */
- { 123, 12, 3, 0, 0, 123, 0, }, /* 813 */
- { 123, 21, 12, 0, 0, 123, 0, }, /* 814 */
- { 114, 7, 12, 0, 0, 114, 0, }, /* 815 */
- { 114, 10, 5, 0, 0, 114, 0, }, /* 816 */
- { 114, 12, 3, 0, 0, 114, 0, }, /* 817 */
- { 114, 21, 12, 0, 0, 114, 0, }, /* 818 */
- { 114, 13, 12, 0, 0, 114, 0, }, /* 819 */
- { 102, 7, 12, 0, 0, 102, 0, }, /* 820 */
- { 102, 12, 3, 0, 0, 102, 0, }, /* 821 */
- { 102, 10, 5, 0, 0, 102, 0, }, /* 822 */
- { 102, 13, 12, 0, 0, 102, 0, }, /* 823 */
- { 126, 7, 12, 0, 0, 126, 0, }, /* 824 */
- { 126, 12, 3, 0, 0, 126, 0, }, /* 825 */
- { 126, 10, 5, 0, 0, 126, 0, }, /* 826 */
- { 126, 13, 12, 0, 0, 126, 0, }, /* 827 */
- { 126, 15, 12, 0, 0, 126, 0, }, /* 828 */
- { 126, 21, 12, 0, 0, 126, 0, }, /* 829 */
- { 126, 26, 12, 0, 0, 126, 0, }, /* 830 */
- { 142, 7, 12, 0, 0, 142, 0, }, /* 831 */
- { 142, 10, 5, 0, 0, 142, 0, }, /* 832 */
- { 142, 12, 3, 0, 0, 142, 0, }, /* 833 */
- { 142, 21, 12, 0, 0, 142, 0, }, /* 834 */
- { 125, 9, 12, 0, 32, 125, 0, }, /* 835 */
- { 125, 5, 12, 0, -32, 125, 0, }, /* 836 */
- { 125, 13, 12, 0, 0, 125, 0, }, /* 837 */
- { 125, 15, 12, 0, 0, 125, 0, }, /* 838 */
- { 125, 7, 12, 0, 0, 125, 0, }, /* 839 */
- { 141, 7, 12, 0, 0, 141, 0, }, /* 840 */
- { 141, 12, 3, 0, 0, 141, 0, }, /* 841 */
- { 141, 10, 5, 0, 0, 141, 0, }, /* 842 */
- { 141, 7, 4, 0, 0, 141, 0, }, /* 843 */
- { 141, 21, 12, 0, 0, 141, 0, }, /* 844 */
- { 140, 7, 12, 0, 0, 140, 0, }, /* 845 */
- { 140, 12, 3, 0, 0, 140, 0, }, /* 846 */
- { 140, 10, 5, 0, 0, 140, 0, }, /* 847 */
- { 140, 7, 4, 0, 0, 140, 0, }, /* 848 */
- { 140, 21, 12, 0, 0, 140, 0, }, /* 849 */
- { 122, 7, 12, 0, 0, 122, 0, }, /* 850 */
- { 133, 7, 12, 0, 0, 133, 0, }, /* 851 */
- { 133, 10, 5, 0, 0, 133, 0, }, /* 852 */
- { 133, 12, 3, 0, 0, 133, 0, }, /* 853 */
- { 133, 21, 12, 0, 0, 133, 0, }, /* 854 */
- { 133, 13, 12, 0, 0, 133, 0, }, /* 855 */
- { 133, 15, 12, 0, 0, 133, 0, }, /* 856 */
- { 134, 21, 12, 0, 0, 134, 0, }, /* 857 */
- { 134, 7, 12, 0, 0, 134, 0, }, /* 858 */
- { 134, 12, 3, 0, 0, 134, 0, }, /* 859 */
- { 134, 10, 5, 0, 0, 134, 0, }, /* 860 */
- { 138, 7, 12, 0, 0, 138, 0, }, /* 861 */
- { 138, 12, 3, 0, 0, 138, 0, }, /* 862 */
- { 138, 7, 4, 0, 0, 138, 0, }, /* 863 */
- { 138, 13, 12, 0, 0, 138, 0, }, /* 864 */
- { 143, 7, 12, 0, 0, 143, 0, }, /* 865 */
- { 143, 10, 5, 0, 0, 143, 0, }, /* 866 */
- { 143, 12, 3, 0, 0, 143, 0, }, /* 867 */
- { 143, 13, 12, 0, 0, 143, 0, }, /* 868 */
- { 145, 7, 12, 0, 0, 145, 0, }, /* 869 */
- { 145, 12, 3, 0, 0, 145, 0, }, /* 870 */
- { 145, 10, 5, 0, 0, 145, 0, }, /* 871 */
- { 145, 21, 12, 0, 0, 145, 0, }, /* 872 */
- { 63, 7, 12, 0, 0, 63, 0, }, /* 873 */
- { 63, 14, 12, 0, 0, 63, 0, }, /* 874 */
- { 63, 21, 12, 0, 0, 63, 0, }, /* 875 */
- { 80, 7, 12, 0, 0, 80, 0, }, /* 876 */
- { 127, 7, 12, 0, 0, 127, 0, }, /* 877 */
- { 115, 7, 12, 0, 0, 115, 0, }, /* 878 */
- { 115, 13, 12, 0, 0, 115, 0, }, /* 879 */
- { 115, 21, 12, 0, 0, 115, 0, }, /* 880 */
- { 103, 7, 12, 0, 0, 103, 0, }, /* 881 */
- { 103, 12, 3, 0, 0, 103, 0, }, /* 882 */
- { 103, 21, 12, 0, 0, 103, 0, }, /* 883 */
- { 119, 7, 12, 0, 0, 119, 0, }, /* 884 */
- { 119, 12, 3, 0, 0, 119, 0, }, /* 885 */
- { 119, 21, 12, 0, 0, 119, 0, }, /* 886 */
- { 119, 26, 12, 0, 0, 119, 0, }, /* 887 */
- { 119, 6, 12, 0, 0, 119, 0, }, /* 888 */
- { 119, 13, 12, 0, 0, 119, 0, }, /* 889 */
- { 119, 15, 12, 0, 0, 119, 0, }, /* 890 */
- { 146, 9, 12, 0, 32, 146, 0, }, /* 891 */
- { 146, 5, 12, 0, -32, 146, 0, }, /* 892 */
- { 146, 15, 12, 0, 0, 146, 0, }, /* 893 */
- { 146, 21, 12, 0, 0, 146, 0, }, /* 894 */
- { 99, 7, 12, 0, 0, 99, 0, }, /* 895 */
- { 99, 10, 5, 0, 0, 99, 0, }, /* 896 */
- { 99, 12, 3, 0, 0, 99, 0, }, /* 897 */
- { 99, 6, 12, 0, 0, 99, 0, }, /* 898 */
- { 137, 6, 12, 0, 0, 137, 0, }, /* 899 */
- { 139, 6, 12, 0, 0, 139, 0, }, /* 900 */
- { 137, 7, 12, 0, 0, 137, 0, }, /* 901 */
- { 139, 7, 12, 0, 0, 139, 0, }, /* 902 */
- { 105, 7, 12, 0, 0, 105, 0, }, /* 903 */
- { 105, 26, 12, 0, 0, 105, 0, }, /* 904 */
- { 105, 12, 3, 0, 0, 105, 0, }, /* 905 */
- { 105, 21, 12, 0, 0, 105, 0, }, /* 906 */
- { 10, 1, 2, 0, 0, 105, 0, }, /* 907 */
- { 10, 10, 3, 0, 0, 10, 0, }, /* 908 */
- { 10, 10, 5, 0, 0, 10, 0, }, /* 909 */
- { 20, 12, 3, 0, 0, 20, 0, }, /* 910 */
- { 131, 26, 12, 0, 0, 131, 0, }, /* 911 */
- { 131, 12, 3, 0, 0, 131, 0, }, /* 912 */
- { 131, 21, 12, 0, 0, 131, 0, }, /* 913 */
- { 18, 12, 3, 0, 0, 18, 0, }, /* 914 */
- { 113, 7, 12, 0, 0, 113, 0, }, /* 915 */
- { 113, 15, 12, 0, 0, 113, 0, }, /* 916 */
- { 113, 12, 3, 0, 0, 113, 0, }, /* 917 */
- { 132, 9, 12, 0, 34, 132, 0, }, /* 918 */
- { 132, 5, 12, 0, -34, 132, 0, }, /* 919 */
- { 132, 12, 3, 0, 0, 132, 0, }, /* 920 */
- { 132, 13, 12, 0, 0, 132, 0, }, /* 921 */
- { 132, 21, 12, 0, 0, 132, 0, }, /* 922 */
- { 0, 2, 14, 0, 0, 0, 0, }, /* 923 */
- { 10, 26, 11, 0, 0, 10, 0, }, /* 924 */
- { 27, 26, 12, 0, 0, 27, 0, }, /* 925 */
- { 10, 24, 3, 0, 0, 10, 0, }, /* 926 */
- { 10, 1, 3, 0, 0, 10, 0, }, /* 927 */
+const ucd_record PRIV(ucd_records)[] = { /* 6832 bytes, record size 8 */
+ { 9, 0, 2, 0, 0, }, /* 0 */
+ { 9, 0, 1, 0, 0, }, /* 1 */
+ { 9, 0, 0, 0, 0, }, /* 2 */
+ { 9, 29, 12, 0, 0, }, /* 3 */
+ { 9, 21, 12, 0, 0, }, /* 4 */
+ { 9, 23, 12, 0, 0, }, /* 5 */
+ { 9, 22, 12, 0, 0, }, /* 6 */
+ { 9, 18, 12, 0, 0, }, /* 7 */
+ { 9, 25, 12, 0, 0, }, /* 8 */
+ { 9, 17, 12, 0, 0, }, /* 9 */
+ { 9, 13, 12, 0, 0, }, /* 10 */
+ { 33, 9, 12, 0, 32, }, /* 11 */
+ { 33, 9, 12, 100, 32, }, /* 12 */
+ { 33, 9, 12, 1, 32, }, /* 13 */
+ { 9, 24, 12, 0, 0, }, /* 14 */
+ { 9, 16, 12, 0, 0, }, /* 15 */
+ { 33, 5, 12, 0, -32, }, /* 16 */
+ { 33, 5, 12, 100, -32, }, /* 17 */
+ { 33, 5, 12, 1, -32, }, /* 18 */
+ { 9, 26, 12, 0, 0, }, /* 19 */
+ { 9, 26, 14, 0, 0, }, /* 20 */
+ { 33, 7, 12, 0, 0, }, /* 21 */
+ { 9, 20, 12, 0, 0, }, /* 22 */
+ { 9, 1, 2, 0, 0, }, /* 23 */
+ { 9, 15, 12, 0, 0, }, /* 24 */
+ { 9, 5, 12, 26, 775, }, /* 25 */
+ { 9, 19, 12, 0, 0, }, /* 26 */
+ { 33, 9, 12, 104, 32, }, /* 27 */
+ { 33, 5, 12, 0, 7615, }, /* 28 */
+ { 33, 5, 12, 104, -32, }, /* 29 */
+ { 33, 5, 12, 0, 121, }, /* 30 */
+ { 33, 9, 12, 0, 1, }, /* 31 */
+ { 33, 5, 12, 0, -1, }, /* 32 */
+ { 33, 9, 12, 0, 0, }, /* 33 */
+ { 33, 5, 12, 0, 0, }, /* 34 */
+ { 33, 9, 12, 0, -121, }, /* 35 */
+ { 33, 5, 12, 1, -268, }, /* 36 */
+ { 33, 5, 12, 0, 195, }, /* 37 */
+ { 33, 9, 12, 0, 210, }, /* 38 */
+ { 33, 9, 12, 0, 206, }, /* 39 */
+ { 33, 9, 12, 0, 205, }, /* 40 */
+ { 33, 9, 12, 0, 79, }, /* 41 */
+ { 33, 9, 12, 0, 202, }, /* 42 */
+ { 33, 9, 12, 0, 203, }, /* 43 */
+ { 33, 9, 12, 0, 207, }, /* 44 */
+ { 33, 5, 12, 0, 97, }, /* 45 */
+ { 33, 9, 12, 0, 211, }, /* 46 */
+ { 33, 9, 12, 0, 209, }, /* 47 */
+ { 33, 5, 12, 0, 163, }, /* 48 */
+ { 33, 9, 12, 0, 213, }, /* 49 */
+ { 33, 5, 12, 0, 130, }, /* 50 */
+ { 33, 9, 12, 0, 214, }, /* 51 */
+ { 33, 9, 12, 0, 218, }, /* 52 */
+ { 33, 9, 12, 0, 217, }, /* 53 */
+ { 33, 9, 12, 0, 219, }, /* 54 */
+ { 33, 5, 12, 0, 56, }, /* 55 */
+ { 33, 9, 12, 5, 2, }, /* 56 */
+ { 33, 8, 12, 5, 1, }, /* 57 */
+ { 33, 5, 12, 5, -2, }, /* 58 */
+ { 33, 9, 12, 9, 2, }, /* 59 */
+ { 33, 8, 12, 9, 1, }, /* 60 */
+ { 33, 5, 12, 9, -2, }, /* 61 */
+ { 33, 9, 12, 13, 2, }, /* 62 */
+ { 33, 8, 12, 13, 1, }, /* 63 */
+ { 33, 5, 12, 13, -2, }, /* 64 */
+ { 33, 5, 12, 0, -79, }, /* 65 */
+ { 33, 9, 12, 17, 2, }, /* 66 */
+ { 33, 8, 12, 17, 1, }, /* 67 */
+ { 33, 5, 12, 17, -2, }, /* 68 */
+ { 33, 9, 12, 0, -97, }, /* 69 */
+ { 33, 9, 12, 0, -56, }, /* 70 */
+ { 33, 9, 12, 0, -130, }, /* 71 */
+ { 33, 9, 12, 0, 10795, }, /* 72 */
+ { 33, 9, 12, 0, -163, }, /* 73 */
+ { 33, 9, 12, 0, 10792, }, /* 74 */
+ { 33, 5, 12, 0, 10815, }, /* 75 */
+ { 33, 9, 12, 0, -195, }, /* 76 */
+ { 33, 9, 12, 0, 69, }, /* 77 */
+ { 33, 9, 12, 0, 71, }, /* 78 */
+ { 33, 5, 12, 0, 10783, }, /* 79 */
+ { 33, 5, 12, 0, 10780, }, /* 80 */
+ { 33, 5, 12, 0, 10782, }, /* 81 */
+ { 33, 5, 12, 0, -210, }, /* 82 */
+ { 33, 5, 12, 0, -206, }, /* 83 */
+ { 33, 5, 12, 0, -205, }, /* 84 */
+ { 33, 5, 12, 0, -202, }, /* 85 */
+ { 33, 5, 12, 0, -203, }, /* 86 */
+ { 33, 5, 12, 0, 42319, }, /* 87 */
+ { 33, 5, 12, 0, 42315, }, /* 88 */
+ { 33, 5, 12, 0, -207, }, /* 89 */
+ { 33, 5, 12, 0, 42280, }, /* 90 */
+ { 33, 5, 12, 0, 42308, }, /* 91 */
+ { 33, 5, 12, 0, -209, }, /* 92 */
+ { 33, 5, 12, 0, -211, }, /* 93 */
+ { 33, 5, 12, 0, 10743, }, /* 94 */
+ { 33, 5, 12, 0, 42305, }, /* 95 */
+ { 33, 5, 12, 0, 10749, }, /* 96 */
+ { 33, 5, 12, 0, -213, }, /* 97 */
+ { 33, 5, 12, 0, -214, }, /* 98 */
+ { 33, 5, 12, 0, 10727, }, /* 99 */
+ { 33, 5, 12, 0, -218, }, /* 100 */
+ { 33, 5, 12, 0, 42282, }, /* 101 */
+ { 33, 5, 12, 0, -69, }, /* 102 */
+ { 33, 5, 12, 0, -217, }, /* 103 */
+ { 33, 5, 12, 0, -71, }, /* 104 */
+ { 33, 5, 12, 0, -219, }, /* 105 */
+ { 33, 5, 12, 0, 42261, }, /* 106 */
+ { 33, 5, 12, 0, 42258, }, /* 107 */
+ { 33, 6, 12, 0, 0, }, /* 108 */
+ { 9, 6, 12, 0, 0, }, /* 109 */
+ { 3, 24, 12, 0, 0, }, /* 110 */
+ { 27, 12, 3, 0, 0, }, /* 111 */
+ { 27, 12, 3, 21, 116, }, /* 112 */
+ { 19, 9, 12, 0, 1, }, /* 113 */
+ { 19, 5, 12, 0, -1, }, /* 114 */
+ { 19, 24, 12, 0, 0, }, /* 115 */
+ { 9, 2, 12, 0, 0, }, /* 116 */
+ { 19, 6, 12, 0, 0, }, /* 117 */
+ { 19, 5, 12, 0, 130, }, /* 118 */
+ { 19, 9, 12, 0, 116, }, /* 119 */
+ { 19, 9, 12, 0, 38, }, /* 120 */
+ { 19, 9, 12, 0, 37, }, /* 121 */
+ { 19, 9, 12, 0, 64, }, /* 122 */
+ { 19, 9, 12, 0, 63, }, /* 123 */
+ { 19, 5, 12, 0, 0, }, /* 124 */
+ { 19, 9, 12, 0, 32, }, /* 125 */
+ { 19, 9, 12, 34, 32, }, /* 126 */
+ { 19, 9, 12, 59, 32, }, /* 127 */
+ { 19, 9, 12, 38, 32, }, /* 128 */
+ { 19, 9, 12, 21, 32, }, /* 129 */
+ { 19, 9, 12, 51, 32, }, /* 130 */
+ { 19, 9, 12, 26, 32, }, /* 131 */
+ { 19, 9, 12, 47, 32, }, /* 132 */
+ { 19, 9, 12, 55, 32, }, /* 133 */
+ { 19, 9, 12, 30, 32, }, /* 134 */
+ { 19, 9, 12, 43, 32, }, /* 135 */
+ { 19, 9, 12, 96, 32, }, /* 136 */
+ { 19, 5, 12, 0, -38, }, /* 137 */
+ { 19, 5, 12, 0, -37, }, /* 138 */
+ { 19, 5, 12, 0, -32, }, /* 139 */
+ { 19, 5, 12, 34, -32, }, /* 140 */
+ { 19, 5, 12, 59, -32, }, /* 141 */
+ { 19, 5, 12, 38, -32, }, /* 142 */
+ { 19, 5, 12, 21, -116, }, /* 143 */
+ { 19, 5, 12, 51, -32, }, /* 144 */
+ { 19, 5, 12, 26, -775, }, /* 145 */
+ { 19, 5, 12, 47, -32, }, /* 146 */
+ { 19, 5, 12, 55, -32, }, /* 147 */
+ { 19, 5, 12, 30, 1, }, /* 148 */
+ { 19, 5, 12, 30, -32, }, /* 149 */
+ { 19, 5, 12, 43, -32, }, /* 150 */
+ { 19, 5, 12, 96, -32, }, /* 151 */
+ { 19, 5, 12, 0, -64, }, /* 152 */
+ { 19, 5, 12, 0, -63, }, /* 153 */
+ { 19, 9, 12, 0, 8, }, /* 154 */
+ { 19, 5, 12, 34, -30, }, /* 155 */
+ { 19, 5, 12, 38, -25, }, /* 156 */
+ { 19, 9, 12, 0, 0, }, /* 157 */
+ { 19, 5, 12, 43, -15, }, /* 158 */
+ { 19, 5, 12, 47, -22, }, /* 159 */
+ { 19, 5, 12, 0, -8, }, /* 160 */
+ { 10, 9, 12, 0, 1, }, /* 161 */
+ { 10, 5, 12, 0, -1, }, /* 162 */
+ { 19, 5, 12, 51, -54, }, /* 163 */
+ { 19, 5, 12, 55, -48, }, /* 164 */
+ { 19, 5, 12, 0, 7, }, /* 165 */
+ { 19, 5, 12, 0, -116, }, /* 166 */
+ { 19, 9, 12, 38, -60, }, /* 167 */
+ { 19, 5, 12, 59, -64, }, /* 168 */
+ { 19, 25, 12, 0, 0, }, /* 169 */
+ { 19, 9, 12, 0, -7, }, /* 170 */
+ { 19, 9, 12, 0, -130, }, /* 171 */
+ { 12, 9, 12, 0, 80, }, /* 172 */
+ { 12, 9, 12, 0, 32, }, /* 173 */
+ { 12, 9, 12, 63, 32, }, /* 174 */
+ { 12, 9, 12, 67, 32, }, /* 175 */
+ { 12, 9, 12, 71, 32, }, /* 176 */
+ { 12, 9, 12, 75, 32, }, /* 177 */
+ { 12, 9, 12, 79, 32, }, /* 178 */
+ { 12, 9, 12, 84, 32, }, /* 179 */
+ { 12, 5, 12, 0, -32, }, /* 180 */
+ { 12, 5, 12, 63, -32, }, /* 181 */
+ { 12, 5, 12, 67, -32, }, /* 182 */
+ { 12, 5, 12, 71, -32, }, /* 183 */
+ { 12, 5, 12, 75, -32, }, /* 184 */
+ { 12, 5, 12, 79, -32, }, /* 185 */
+ { 12, 5, 12, 84, -32, }, /* 186 */
+ { 12, 5, 12, 0, -80, }, /* 187 */
+ { 12, 9, 12, 0, 1, }, /* 188 */
+ { 12, 5, 12, 0, -1, }, /* 189 */
+ { 12, 9, 12, 88, 1, }, /* 190 */
+ { 12, 5, 12, 88, -1, }, /* 191 */
+ { 12, 26, 12, 0, 0, }, /* 192 */
+ { 12, 12, 3, 0, 0, }, /* 193 */
+ { 12, 11, 3, 0, 0, }, /* 194 */
+ { 12, 9, 12, 0, 15, }, /* 195 */
+ { 12, 5, 12, 0, -15, }, /* 196 */
+ { 1, 9, 12, 0, 48, }, /* 197 */
+ { 1, 6, 12, 0, 0, }, /* 198 */
+ { 1, 21, 12, 0, 0, }, /* 199 */
+ { 1, 5, 12, 0, 0, }, /* 200 */
+ { 1, 5, 12, 0, -48, }, /* 201 */
+ { 1, 17, 12, 0, 0, }, /* 202 */
+ { 1, 26, 12, 0, 0, }, /* 203 */
+ { 1, 23, 12, 0, 0, }, /* 204 */
+ { 25, 12, 3, 0, 0, }, /* 205 */
+ { 25, 17, 12, 0, 0, }, /* 206 */
+ { 25, 21, 12, 0, 0, }, /* 207 */
+ { 25, 7, 12, 0, 0, }, /* 208 */
+ { 0, 1, 4, 0, 0, }, /* 209 */
+ { 9, 1, 4, 0, 0, }, /* 210 */
+ { 0, 25, 12, 0, 0, }, /* 211 */
+ { 0, 21, 12, 0, 0, }, /* 212 */
+ { 0, 23, 12, 0, 0, }, /* 213 */
+ { 0, 26, 12, 0, 0, }, /* 214 */
+ { 0, 12, 3, 0, 0, }, /* 215 */
+ { 0, 1, 2, 0, 0, }, /* 216 */
+ { 0, 7, 12, 0, 0, }, /* 217 */
+ { 0, 13, 12, 0, 0, }, /* 218 */
+ { 0, 6, 12, 0, 0, }, /* 219 */
+ { 49, 21, 12, 0, 0, }, /* 220 */
+ { 49, 1, 4, 0, 0, }, /* 221 */
+ { 49, 7, 12, 0, 0, }, /* 222 */
+ { 49, 12, 3, 0, 0, }, /* 223 */
+ { 55, 7, 12, 0, 0, }, /* 224 */
+ { 55, 12, 3, 0, 0, }, /* 225 */
+ { 63, 13, 12, 0, 0, }, /* 226 */
+ { 63, 7, 12, 0, 0, }, /* 227 */
+ { 63, 12, 3, 0, 0, }, /* 228 */
+ { 63, 6, 12, 0, 0, }, /* 229 */
+ { 63, 26, 12, 0, 0, }, /* 230 */
+ { 63, 21, 12, 0, 0, }, /* 231 */
+ { 63, 23, 12, 0, 0, }, /* 232 */
+ { 89, 7, 12, 0, 0, }, /* 233 */
+ { 89, 12, 3, 0, 0, }, /* 234 */
+ { 89, 6, 12, 0, 0, }, /* 235 */
+ { 89, 21, 12, 0, 0, }, /* 236 */
+ { 94, 7, 12, 0, 0, }, /* 237 */
+ { 94, 12, 3, 0, 0, }, /* 238 */
+ { 94, 21, 12, 0, 0, }, /* 239 */
+ { 14, 12, 3, 0, 0, }, /* 240 */
+ { 14, 10, 5, 0, 0, }, /* 241 */
+ { 14, 7, 12, 0, 0, }, /* 242 */
+ { 14, 13, 12, 0, 0, }, /* 243 */
+ { 14, 21, 12, 0, 0, }, /* 244 */
+ { 14, 6, 12, 0, 0, }, /* 245 */
+ { 2, 7, 12, 0, 0, }, /* 246 */
+ { 2, 12, 3, 0, 0, }, /* 247 */
+ { 2, 10, 5, 0, 0, }, /* 248 */
+ { 2, 10, 3, 0, 0, }, /* 249 */
+ { 2, 13, 12, 0, 0, }, /* 250 */
+ { 2, 23, 12, 0, 0, }, /* 251 */
+ { 2, 15, 12, 0, 0, }, /* 252 */
+ { 2, 26, 12, 0, 0, }, /* 253 */
+ { 2, 21, 12, 0, 0, }, /* 254 */
+ { 21, 12, 3, 0, 0, }, /* 255 */
+ { 21, 10, 5, 0, 0, }, /* 256 */
+ { 21, 7, 12, 0, 0, }, /* 257 */
+ { 21, 13, 12, 0, 0, }, /* 258 */
+ { 21, 21, 12, 0, 0, }, /* 259 */
+ { 20, 12, 3, 0, 0, }, /* 260 */
+ { 20, 10, 5, 0, 0, }, /* 261 */
+ { 20, 7, 12, 0, 0, }, /* 262 */
+ { 20, 13, 12, 0, 0, }, /* 263 */
+ { 20, 21, 12, 0, 0, }, /* 264 */
+ { 20, 23, 12, 0, 0, }, /* 265 */
+ { 43, 12, 3, 0, 0, }, /* 266 */
+ { 43, 10, 5, 0, 0, }, /* 267 */
+ { 43, 7, 12, 0, 0, }, /* 268 */
+ { 43, 10, 3, 0, 0, }, /* 269 */
+ { 43, 13, 12, 0, 0, }, /* 270 */
+ { 43, 26, 12, 0, 0, }, /* 271 */
+ { 43, 15, 12, 0, 0, }, /* 272 */
+ { 53, 12, 3, 0, 0, }, /* 273 */
+ { 53, 7, 12, 0, 0, }, /* 274 */
+ { 53, 10, 3, 0, 0, }, /* 275 */
+ { 53, 10, 5, 0, 0, }, /* 276 */
+ { 53, 13, 12, 0, 0, }, /* 277 */
+ { 53, 15, 12, 0, 0, }, /* 278 */
+ { 53, 26, 12, 0, 0, }, /* 279 */
+ { 53, 23, 12, 0, 0, }, /* 280 */
+ { 54, 12, 3, 0, 0, }, /* 281 */
+ { 54, 10, 5, 0, 0, }, /* 282 */
+ { 54, 7, 12, 0, 0, }, /* 283 */
+ { 54, 13, 12, 0, 0, }, /* 284 */
+ { 54, 15, 12, 0, 0, }, /* 285 */
+ { 54, 26, 12, 0, 0, }, /* 286 */
+ { 28, 7, 12, 0, 0, }, /* 287 */
+ { 28, 12, 3, 0, 0, }, /* 288 */
+ { 28, 10, 5, 0, 0, }, /* 289 */
+ { 28, 21, 12, 0, 0, }, /* 290 */
+ { 28, 10, 3, 0, 0, }, /* 291 */
+ { 28, 13, 12, 0, 0, }, /* 292 */
+ { 36, 12, 3, 0, 0, }, /* 293 */
+ { 36, 10, 5, 0, 0, }, /* 294 */
+ { 36, 7, 12, 0, 0, }, /* 295 */
+ { 36, 10, 3, 0, 0, }, /* 296 */
+ { 36, 7, 4, 0, 0, }, /* 297 */
+ { 36, 26, 12, 0, 0, }, /* 298 */
+ { 36, 15, 12, 0, 0, }, /* 299 */
+ { 36, 13, 12, 0, 0, }, /* 300 */
+ { 47, 10, 5, 0, 0, }, /* 301 */
+ { 47, 7, 12, 0, 0, }, /* 302 */
+ { 47, 12, 3, 0, 0, }, /* 303 */
+ { 47, 10, 3, 0, 0, }, /* 304 */
+ { 47, 13, 12, 0, 0, }, /* 305 */
+ { 47, 21, 12, 0, 0, }, /* 306 */
+ { 56, 7, 12, 0, 0, }, /* 307 */
+ { 56, 12, 3, 0, 0, }, /* 308 */
+ { 56, 7, 5, 0, 0, }, /* 309 */
+ { 56, 6, 12, 0, 0, }, /* 310 */
+ { 56, 21, 12, 0, 0, }, /* 311 */
+ { 56, 13, 12, 0, 0, }, /* 312 */
+ { 32, 7, 12, 0, 0, }, /* 313 */
+ { 32, 12, 3, 0, 0, }, /* 314 */
+ { 32, 7, 5, 0, 0, }, /* 315 */
+ { 32, 6, 12, 0, 0, }, /* 316 */
+ { 32, 13, 12, 0, 0, }, /* 317 */
+ { 57, 7, 12, 0, 0, }, /* 318 */
+ { 57, 26, 12, 0, 0, }, /* 319 */
+ { 57, 21, 12, 0, 0, }, /* 320 */
+ { 57, 12, 3, 0, 0, }, /* 321 */
+ { 57, 13, 12, 0, 0, }, /* 322 */
+ { 57, 15, 12, 0, 0, }, /* 323 */
+ { 57, 22, 12, 0, 0, }, /* 324 */
+ { 57, 18, 12, 0, 0, }, /* 325 */
+ { 57, 10, 5, 0, 0, }, /* 326 */
+ { 38, 7, 12, 0, 0, }, /* 327 */
+ { 38, 10, 12, 0, 0, }, /* 328 */
+ { 38, 12, 3, 0, 0, }, /* 329 */
+ { 38, 10, 5, 0, 0, }, /* 330 */
+ { 38, 13, 12, 0, 0, }, /* 331 */
+ { 38, 21, 12, 0, 0, }, /* 332 */
+ { 38, 26, 12, 0, 0, }, /* 333 */
+ { 16, 9, 12, 0, 7264, }, /* 334 */
+ { 16, 5, 12, 0, 3008, }, /* 335 */
+ { 16, 6, 12, 0, 0, }, /* 336 */
+ { 23, 7, 6, 0, 0, }, /* 337 */
+ { 23, 7, 7, 0, 0, }, /* 338 */
+ { 23, 7, 8, 0, 0, }, /* 339 */
+ { 15, 7, 12, 0, 0, }, /* 340 */
+ { 15, 12, 3, 0, 0, }, /* 341 */
+ { 15, 21, 12, 0, 0, }, /* 342 */
+ { 15, 15, 12, 0, 0, }, /* 343 */
+ { 15, 26, 12, 0, 0, }, /* 344 */
+ { 8, 9, 12, 0, 38864, }, /* 345 */
+ { 8, 9, 12, 0, 8, }, /* 346 */
+ { 8, 5, 12, 0, -8, }, /* 347 */
+ { 7, 17, 12, 0, 0, }, /* 348 */
+ { 7, 7, 12, 0, 0, }, /* 349 */
+ { 7, 21, 12, 0, 0, }, /* 350 */
+ { 40, 29, 12, 0, 0, }, /* 351 */
+ { 40, 7, 12, 0, 0, }, /* 352 */
+ { 40, 22, 12, 0, 0, }, /* 353 */
+ { 40, 18, 12, 0, 0, }, /* 354 */
+ { 45, 7, 12, 0, 0, }, /* 355 */
+ { 45, 14, 12, 0, 0, }, /* 356 */
+ { 50, 7, 12, 0, 0, }, /* 357 */
+ { 50, 12, 3, 0, 0, }, /* 358 */
+ { 24, 7, 12, 0, 0, }, /* 359 */
+ { 24, 12, 3, 0, 0, }, /* 360 */
+ { 6, 7, 12, 0, 0, }, /* 361 */
+ { 6, 12, 3, 0, 0, }, /* 362 */
+ { 51, 7, 12, 0, 0, }, /* 363 */
+ { 51, 12, 3, 0, 0, }, /* 364 */
+ { 31, 7, 12, 0, 0, }, /* 365 */
+ { 31, 12, 3, 0, 0, }, /* 366 */
+ { 31, 10, 5, 0, 0, }, /* 367 */
+ { 31, 21, 12, 0, 0, }, /* 368 */
+ { 31, 6, 12, 0, 0, }, /* 369 */
+ { 31, 23, 12, 0, 0, }, /* 370 */
+ { 31, 13, 12, 0, 0, }, /* 371 */
+ { 31, 15, 12, 0, 0, }, /* 372 */
+ { 37, 21, 12, 0, 0, }, /* 373 */
+ { 37, 17, 12, 0, 0, }, /* 374 */
+ { 37, 12, 3, 0, 0, }, /* 375 */
+ { 37, 1, 2, 0, 0, }, /* 376 */
+ { 37, 13, 12, 0, 0, }, /* 377 */
+ { 37, 7, 12, 0, 0, }, /* 378 */
+ { 37, 6, 12, 0, 0, }, /* 379 */
+ { 34, 7, 12, 0, 0, }, /* 380 */
+ { 34, 12, 3, 0, 0, }, /* 381 */
+ { 34, 10, 5, 0, 0, }, /* 382 */
+ { 34, 26, 12, 0, 0, }, /* 383 */
+ { 34, 21, 12, 0, 0, }, /* 384 */
+ { 34, 13, 12, 0, 0, }, /* 385 */
+ { 52, 7, 12, 0, 0, }, /* 386 */
+ { 39, 7, 12, 0, 0, }, /* 387 */
+ { 39, 13, 12, 0, 0, }, /* 388 */
+ { 39, 15, 12, 0, 0, }, /* 389 */
+ { 39, 26, 12, 0, 0, }, /* 390 */
+ { 31, 26, 12, 0, 0, }, /* 391 */
+ { 5, 7, 12, 0, 0, }, /* 392 */
+ { 5, 12, 3, 0, 0, }, /* 393 */
+ { 5, 10, 5, 0, 0, }, /* 394 */
+ { 5, 21, 12, 0, 0, }, /* 395 */
+ { 90, 7, 12, 0, 0, }, /* 396 */
+ { 90, 10, 5, 0, 0, }, /* 397 */
+ { 90, 12, 3, 0, 0, }, /* 398 */
+ { 90, 10, 12, 0, 0, }, /* 399 */
+ { 90, 13, 12, 0, 0, }, /* 400 */
+ { 90, 21, 12, 0, 0, }, /* 401 */
+ { 90, 6, 12, 0, 0, }, /* 402 */
+ { 27, 11, 3, 0, 0, }, /* 403 */
+ { 61, 12, 3, 0, 0, }, /* 404 */
+ { 61, 10, 5, 0, 0, }, /* 405 */
+ { 61, 7, 12, 0, 0, }, /* 406 */
+ { 61, 13, 12, 0, 0, }, /* 407 */
+ { 61, 21, 12, 0, 0, }, /* 408 */
+ { 61, 26, 12, 0, 0, }, /* 409 */
+ { 75, 12, 3, 0, 0, }, /* 410 */
+ { 75, 10, 5, 0, 0, }, /* 411 */
+ { 75, 7, 12, 0, 0, }, /* 412 */
+ { 75, 13, 12, 0, 0, }, /* 413 */
+ { 92, 7, 12, 0, 0, }, /* 414 */
+ { 92, 12, 3, 0, 0, }, /* 415 */
+ { 92, 10, 5, 0, 0, }, /* 416 */
+ { 92, 21, 12, 0, 0, }, /* 417 */
+ { 69, 7, 12, 0, 0, }, /* 418 */
+ { 69, 10, 5, 0, 0, }, /* 419 */
+ { 69, 12, 3, 0, 0, }, /* 420 */
+ { 69, 21, 12, 0, 0, }, /* 421 */
+ { 69, 13, 12, 0, 0, }, /* 422 */
+ { 72, 13, 12, 0, 0, }, /* 423 */
+ { 72, 7, 12, 0, 0, }, /* 424 */
+ { 72, 6, 12, 0, 0, }, /* 425 */
+ { 72, 21, 12, 0, 0, }, /* 426 */
+ { 12, 5, 12, 63, -6222, }, /* 427 */
+ { 12, 5, 12, 67, -6221, }, /* 428 */
+ { 12, 5, 12, 71, -6212, }, /* 429 */
+ { 12, 5, 12, 75, -6210, }, /* 430 */
+ { 12, 5, 12, 79, -6210, }, /* 431 */
+ { 12, 5, 12, 79, -6211, }, /* 432 */
+ { 12, 5, 12, 84, -6204, }, /* 433 */
+ { 12, 5, 12, 88, -6180, }, /* 434 */
+ { 12, 5, 12, 108, 35267, }, /* 435 */
+ { 16, 9, 12, 0, -3008, }, /* 436 */
+ { 75, 21, 12, 0, 0, }, /* 437 */
+ { 9, 10, 5, 0, 0, }, /* 438 */
+ { 9, 7, 12, 0, 0, }, /* 439 */
+ { 12, 5, 12, 0, 0, }, /* 440 */
+ { 12, 6, 12, 0, 0, }, /* 441 */
+ { 33, 5, 12, 0, 35332, }, /* 442 */
+ { 33, 5, 12, 0, 3814, }, /* 443 */
+ { 33, 9, 12, 92, 1, }, /* 444 */
+ { 33, 5, 12, 92, -1, }, /* 445 */
+ { 33, 5, 12, 92, -58, }, /* 446 */
+ { 33, 9, 12, 0, -7615, }, /* 447 */
+ { 19, 5, 12, 0, 8, }, /* 448 */
+ { 19, 9, 12, 0, -8, }, /* 449 */
+ { 19, 5, 12, 0, 74, }, /* 450 */
+ { 19, 5, 12, 0, 86, }, /* 451 */
+ { 19, 5, 12, 0, 100, }, /* 452 */
+ { 19, 5, 12, 0, 128, }, /* 453 */
+ { 19, 5, 12, 0, 112, }, /* 454 */
+ { 19, 5, 12, 0, 126, }, /* 455 */
+ { 19, 8, 12, 0, -8, }, /* 456 */
+ { 19, 5, 12, 0, 9, }, /* 457 */
+ { 19, 9, 12, 0, -74, }, /* 458 */
+ { 19, 8, 12, 0, -9, }, /* 459 */
+ { 19, 5, 12, 21, -7173, }, /* 460 */
+ { 19, 9, 12, 0, -86, }, /* 461 */
+ { 19, 9, 12, 0, -100, }, /* 462 */
+ { 19, 9, 12, 0, -112, }, /* 463 */
+ { 19, 9, 12, 0, -128, }, /* 464 */
+ { 19, 9, 12, 0, -126, }, /* 465 */
+ { 27, 1, 3, 0, 0, }, /* 466 */
+ { 27, 1, 13, 0, 0, }, /* 467 */
+ { 9, 27, 2, 0, 0, }, /* 468 */
+ { 9, 28, 2, 0, 0, }, /* 469 */
+ { 9, 21, 14, 0, 0, }, /* 470 */
+ { 9, 2, 2, 0, 0, }, /* 471 */
+ { 9, 9, 12, 0, 0, }, /* 472 */
+ { 9, 5, 12, 0, 0, }, /* 473 */
+ { 19, 9, 12, 96, -7517, }, /* 474 */
+ { 33, 9, 12, 100, -8383, }, /* 475 */
+ { 33, 9, 12, 104, -8262, }, /* 476 */
+ { 33, 9, 12, 0, 28, }, /* 477 */
+ { 9, 5, 14, 0, 0, }, /* 478 */
+ { 33, 5, 12, 0, -28, }, /* 479 */
+ { 33, 14, 12, 0, 16, }, /* 480 */
+ { 33, 14, 12, 0, -16, }, /* 481 */
+ { 33, 14, 12, 0, 0, }, /* 482 */
+ { 9, 25, 14, 0, 0, }, /* 483 */
+ { 9, 26, 12, 0, 26, }, /* 484 */
+ { 9, 26, 14, 0, 26, }, /* 485 */
+ { 9, 26, 12, 0, -26, }, /* 486 */
+ { 4, 26, 12, 0, 0, }, /* 487 */
+ { 17, 9, 12, 0, 48, }, /* 488 */
+ { 17, 5, 12, 0, -48, }, /* 489 */
+ { 33, 9, 12, 0, -10743, }, /* 490 */
+ { 33, 9, 12, 0, -3814, }, /* 491 */
+ { 33, 9, 12, 0, -10727, }, /* 492 */
+ { 33, 5, 12, 0, -10795, }, /* 493 */
+ { 33, 5, 12, 0, -10792, }, /* 494 */
+ { 33, 9, 12, 0, -10780, }, /* 495 */
+ { 33, 9, 12, 0, -10749, }, /* 496 */
+ { 33, 9, 12, 0, -10783, }, /* 497 */
+ { 33, 9, 12, 0, -10782, }, /* 498 */
+ { 33, 9, 12, 0, -10815, }, /* 499 */
+ { 10, 5, 12, 0, 0, }, /* 500 */
+ { 10, 26, 12, 0, 0, }, /* 501 */
+ { 10, 12, 3, 0, 0, }, /* 502 */
+ { 10, 21, 12, 0, 0, }, /* 503 */
+ { 10, 15, 12, 0, 0, }, /* 504 */
+ { 16, 5, 12, 0, -7264, }, /* 505 */
+ { 58, 7, 12, 0, 0, }, /* 506 */
+ { 58, 6, 12, 0, 0, }, /* 507 */
+ { 58, 21, 12, 0, 0, }, /* 508 */
+ { 58, 12, 3, 0, 0, }, /* 509 */
+ { 22, 26, 12, 0, 0, }, /* 510 */
+ { 22, 6, 12, 0, 0, }, /* 511 */
+ { 22, 14, 12, 0, 0, }, /* 512 */
+ { 23, 10, 3, 0, 0, }, /* 513 */
+ { 9, 17, 14, 0, 0, }, /* 514 */
+ { 26, 7, 12, 0, 0, }, /* 515 */
+ { 26, 6, 12, 0, 0, }, /* 516 */
+ { 29, 7, 12, 0, 0, }, /* 517 */
+ { 29, 6, 12, 0, 0, }, /* 518 */
+ { 3, 7, 12, 0, 0, }, /* 519 */
+ { 23, 7, 12, 0, 0, }, /* 520 */
+ { 23, 26, 12, 0, 0, }, /* 521 */
+ { 29, 26, 12, 0, 0, }, /* 522 */
+ { 22, 7, 12, 0, 0, }, /* 523 */
+ { 60, 7, 12, 0, 0, }, /* 524 */
+ { 60, 6, 12, 0, 0, }, /* 525 */
+ { 60, 26, 12, 0, 0, }, /* 526 */
+ { 85, 7, 12, 0, 0, }, /* 527 */
+ { 85, 6, 12, 0, 0, }, /* 528 */
+ { 85, 21, 12, 0, 0, }, /* 529 */
+ { 76, 7, 12, 0, 0, }, /* 530 */
+ { 76, 6, 12, 0, 0, }, /* 531 */
+ { 76, 21, 12, 0, 0, }, /* 532 */
+ { 76, 13, 12, 0, 0, }, /* 533 */
+ { 12, 9, 12, 108, 1, }, /* 534 */
+ { 12, 5, 12, 108, -35267, }, /* 535 */
+ { 12, 7, 12, 0, 0, }, /* 536 */
+ { 12, 21, 12, 0, 0, }, /* 537 */
+ { 78, 7, 12, 0, 0, }, /* 538 */
+ { 78, 14, 12, 0, 0, }, /* 539 */
+ { 78, 12, 3, 0, 0, }, /* 540 */
+ { 78, 21, 12, 0, 0, }, /* 541 */
+ { 33, 9, 12, 0, -35332, }, /* 542 */
+ { 33, 9, 12, 0, -42280, }, /* 543 */
+ { 33, 9, 12, 0, -42308, }, /* 544 */
+ { 33, 9, 12, 0, -42319, }, /* 545 */
+ { 33, 9, 12, 0, -42315, }, /* 546 */
+ { 33, 9, 12, 0, -42305, }, /* 547 */
+ { 33, 9, 12, 0, -42258, }, /* 548 */
+ { 33, 9, 12, 0, -42282, }, /* 549 */
+ { 33, 9, 12, 0, -42261, }, /* 550 */
+ { 33, 9, 12, 0, 928, }, /* 551 */
+ { 48, 7, 12, 0, 0, }, /* 552 */
+ { 48, 12, 3, 0, 0, }, /* 553 */
+ { 48, 10, 5, 0, 0, }, /* 554 */
+ { 48, 26, 12, 0, 0, }, /* 555 */
+ { 64, 7, 12, 0, 0, }, /* 556 */
+ { 64, 21, 12, 0, 0, }, /* 557 */
+ { 74, 10, 5, 0, 0, }, /* 558 */
+ { 74, 7, 12, 0, 0, }, /* 559 */
+ { 74, 12, 3, 0, 0, }, /* 560 */
+ { 74, 21, 12, 0, 0, }, /* 561 */
+ { 74, 13, 12, 0, 0, }, /* 562 */
+ { 68, 13, 12, 0, 0, }, /* 563 */
+ { 68, 7, 12, 0, 0, }, /* 564 */
+ { 68, 12, 3, 0, 0, }, /* 565 */
+ { 68, 21, 12, 0, 0, }, /* 566 */
+ { 73, 7, 12, 0, 0, }, /* 567 */
+ { 73, 12, 3, 0, 0, }, /* 568 */
+ { 73, 10, 5, 0, 0, }, /* 569 */
+ { 73, 21, 12, 0, 0, }, /* 570 */
+ { 83, 12, 3, 0, 0, }, /* 571 */
+ { 83, 10, 5, 0, 0, }, /* 572 */
+ { 83, 7, 12, 0, 0, }, /* 573 */
+ { 83, 21, 12, 0, 0, }, /* 574 */
+ { 83, 13, 12, 0, 0, }, /* 575 */
+ { 38, 6, 12, 0, 0, }, /* 576 */
+ { 67, 7, 12, 0, 0, }, /* 577 */
+ { 67, 12, 3, 0, 0, }, /* 578 */
+ { 67, 10, 5, 0, 0, }, /* 579 */
+ { 67, 13, 12, 0, 0, }, /* 580 */
+ { 67, 21, 12, 0, 0, }, /* 581 */
+ { 91, 7, 12, 0, 0, }, /* 582 */
+ { 91, 12, 3, 0, 0, }, /* 583 */
+ { 91, 6, 12, 0, 0, }, /* 584 */
+ { 91, 21, 12, 0, 0, }, /* 585 */
+ { 86, 7, 12, 0, 0, }, /* 586 */
+ { 86, 10, 5, 0, 0, }, /* 587 */
+ { 86, 12, 3, 0, 0, }, /* 588 */
+ { 86, 21, 12, 0, 0, }, /* 589 */
+ { 86, 6, 12, 0, 0, }, /* 590 */
+ { 33, 5, 12, 0, -928, }, /* 591 */
+ { 8, 5, 12, 0, -38864, }, /* 592 */
+ { 86, 13, 12, 0, 0, }, /* 593 */
+ { 23, 7, 9, 0, 0, }, /* 594 */
+ { 23, 7, 10, 0, 0, }, /* 595 */
+ { 9, 4, 2, 0, 0, }, /* 596 */
+ { 9, 3, 12, 0, 0, }, /* 597 */
+ { 25, 25, 12, 0, 0, }, /* 598 */
+ { 0, 24, 12, 0, 0, }, /* 599 */
+ { 9, 6, 3, 0, 0, }, /* 600 */
+ { 35, 7, 12, 0, 0, }, /* 601 */
+ { 19, 14, 12, 0, 0, }, /* 602 */
+ { 19, 15, 12, 0, 0, }, /* 603 */
+ { 19, 26, 12, 0, 0, }, /* 604 */
+ { 70, 7, 12, 0, 0, }, /* 605 */
+ { 66, 7, 12, 0, 0, }, /* 606 */
+ { 41, 7, 12, 0, 0, }, /* 607 */
+ { 41, 15, 12, 0, 0, }, /* 608 */
+ { 18, 7, 12, 0, 0, }, /* 609 */
+ { 18, 14, 12, 0, 0, }, /* 610 */
+ { 117, 7, 12, 0, 0, }, /* 611 */
+ { 117, 12, 3, 0, 0, }, /* 612 */
+ { 59, 7, 12, 0, 0, }, /* 613 */
+ { 59, 21, 12, 0, 0, }, /* 614 */
+ { 42, 7, 12, 0, 0, }, /* 615 */
+ { 42, 21, 12, 0, 0, }, /* 616 */
+ { 42, 14, 12, 0, 0, }, /* 617 */
+ { 13, 9, 12, 0, 40, }, /* 618 */
+ { 13, 5, 12, 0, -40, }, /* 619 */
+ { 46, 7, 12, 0, 0, }, /* 620 */
+ { 44, 7, 12, 0, 0, }, /* 621 */
+ { 44, 13, 12, 0, 0, }, /* 622 */
+ { 135, 9, 12, 0, 40, }, /* 623 */
+ { 135, 5, 12, 0, -40, }, /* 624 */
+ { 105, 7, 12, 0, 0, }, /* 625 */
+ { 103, 7, 12, 0, 0, }, /* 626 */
+ { 103, 21, 12, 0, 0, }, /* 627 */
+ { 109, 7, 12, 0, 0, }, /* 628 */
+ { 11, 7, 12, 0, 0, }, /* 629 */
+ { 80, 7, 12, 0, 0, }, /* 630 */
+ { 80, 21, 12, 0, 0, }, /* 631 */
+ { 80, 15, 12, 0, 0, }, /* 632 */
+ { 119, 7, 12, 0, 0, }, /* 633 */
+ { 119, 26, 12, 0, 0, }, /* 634 */
+ { 119, 15, 12, 0, 0, }, /* 635 */
+ { 115, 7, 12, 0, 0, }, /* 636 */
+ { 115, 15, 12, 0, 0, }, /* 637 */
+ { 127, 7, 12, 0, 0, }, /* 638 */
+ { 127, 15, 12, 0, 0, }, /* 639 */
+ { 65, 7, 12, 0, 0, }, /* 640 */
+ { 65, 15, 12, 0, 0, }, /* 641 */
+ { 65, 21, 12, 0, 0, }, /* 642 */
+ { 71, 7, 12, 0, 0, }, /* 643 */
+ { 71, 21, 12, 0, 0, }, /* 644 */
+ { 97, 7, 12, 0, 0, }, /* 645 */
+ { 96, 7, 12, 0, 0, }, /* 646 */
+ { 96, 15, 12, 0, 0, }, /* 647 */
+ { 30, 7, 12, 0, 0, }, /* 648 */
+ { 30, 12, 3, 0, 0, }, /* 649 */
+ { 30, 15, 12, 0, 0, }, /* 650 */
+ { 30, 21, 12, 0, 0, }, /* 651 */
+ { 87, 7, 12, 0, 0, }, /* 652 */
+ { 87, 15, 12, 0, 0, }, /* 653 */
+ { 87, 21, 12, 0, 0, }, /* 654 */
+ { 116, 7, 12, 0, 0, }, /* 655 */
+ { 116, 15, 12, 0, 0, }, /* 656 */
+ { 111, 7, 12, 0, 0, }, /* 657 */
+ { 111, 26, 12, 0, 0, }, /* 658 */
+ { 111, 12, 3, 0, 0, }, /* 659 */
+ { 111, 15, 12, 0, 0, }, /* 660 */
+ { 111, 21, 12, 0, 0, }, /* 661 */
+ { 77, 7, 12, 0, 0, }, /* 662 */
+ { 77, 21, 12, 0, 0, }, /* 663 */
+ { 82, 7, 12, 0, 0, }, /* 664 */
+ { 82, 15, 12, 0, 0, }, /* 665 */
+ { 81, 7, 12, 0, 0, }, /* 666 */
+ { 81, 15, 12, 0, 0, }, /* 667 */
+ { 120, 7, 12, 0, 0, }, /* 668 */
+ { 120, 21, 12, 0, 0, }, /* 669 */
+ { 120, 15, 12, 0, 0, }, /* 670 */
+ { 88, 7, 12, 0, 0, }, /* 671 */
+ { 129, 9, 12, 0, 64, }, /* 672 */
+ { 129, 5, 12, 0, -64, }, /* 673 */
+ { 129, 15, 12, 0, 0, }, /* 674 */
+ { 143, 7, 12, 0, 0, }, /* 675 */
+ { 143, 12, 3, 0, 0, }, /* 676 */
+ { 143, 13, 12, 0, 0, }, /* 677 */
+ { 0, 15, 12, 0, 0, }, /* 678 */
+ { 146, 7, 12, 0, 0, }, /* 679 */
+ { 146, 15, 12, 0, 0, }, /* 680 */
+ { 147, 7, 12, 0, 0, }, /* 681 */
+ { 147, 12, 3, 0, 0, }, /* 682 */
+ { 147, 15, 12, 0, 0, }, /* 683 */
+ { 147, 21, 12, 0, 0, }, /* 684 */
+ { 93, 10, 5, 0, 0, }, /* 685 */
+ { 93, 12, 3, 0, 0, }, /* 686 */
+ { 93, 7, 12, 0, 0, }, /* 687 */
+ { 93, 21, 12, 0, 0, }, /* 688 */
+ { 93, 15, 12, 0, 0, }, /* 689 */
+ { 93, 13, 12, 0, 0, }, /* 690 */
+ { 84, 12, 3, 0, 0, }, /* 691 */
+ { 84, 10, 5, 0, 0, }, /* 692 */
+ { 84, 7, 12, 0, 0, }, /* 693 */
+ { 84, 21, 12, 0, 0, }, /* 694 */
+ { 84, 1, 4, 0, 0, }, /* 695 */
+ { 100, 7, 12, 0, 0, }, /* 696 */
+ { 100, 13, 12, 0, 0, }, /* 697 */
+ { 95, 12, 3, 0, 0, }, /* 698 */
+ { 95, 7, 12, 0, 0, }, /* 699 */
+ { 95, 10, 5, 0, 0, }, /* 700 */
+ { 95, 13, 12, 0, 0, }, /* 701 */
+ { 95, 21, 12, 0, 0, }, /* 702 */
+ { 110, 7, 12, 0, 0, }, /* 703 */
+ { 110, 12, 3, 0, 0, }, /* 704 */
+ { 110, 21, 12, 0, 0, }, /* 705 */
+ { 99, 12, 3, 0, 0, }, /* 706 */
+ { 99, 10, 5, 0, 0, }, /* 707 */
+ { 99, 7, 12, 0, 0, }, /* 708 */
+ { 99, 7, 4, 0, 0, }, /* 709 */
+ { 99, 21, 12, 0, 0, }, /* 710 */
+ { 99, 13, 12, 0, 0, }, /* 711 */
+ { 47, 15, 12, 0, 0, }, /* 712 */
+ { 107, 7, 12, 0, 0, }, /* 713 */
+ { 107, 10, 5, 0, 0, }, /* 714 */
+ { 107, 12, 3, 0, 0, }, /* 715 */
+ { 107, 21, 12, 0, 0, }, /* 716 */
+ { 128, 7, 12, 0, 0, }, /* 717 */
+ { 128, 21, 12, 0, 0, }, /* 718 */
+ { 108, 7, 12, 0, 0, }, /* 719 */
+ { 108, 12, 3, 0, 0, }, /* 720 */
+ { 108, 10, 5, 0, 0, }, /* 721 */
+ { 108, 13, 12, 0, 0, }, /* 722 */
+ { 106, 12, 3, 0, 0, }, /* 723 */
+ { 106, 10, 5, 0, 0, }, /* 724 */
+ { 106, 7, 12, 0, 0, }, /* 725 */
+ { 106, 10, 3, 0, 0, }, /* 726 */
+ { 134, 7, 12, 0, 0, }, /* 727 */
+ { 134, 10, 5, 0, 0, }, /* 728 */
+ { 134, 12, 3, 0, 0, }, /* 729 */
+ { 134, 21, 12, 0, 0, }, /* 730 */
+ { 134, 13, 12, 0, 0, }, /* 731 */
+ { 123, 7, 12, 0, 0, }, /* 732 */
+ { 123, 10, 3, 0, 0, }, /* 733 */
+ { 123, 10, 5, 0, 0, }, /* 734 */
+ { 123, 12, 3, 0, 0, }, /* 735 */
+ { 123, 21, 12, 0, 0, }, /* 736 */
+ { 123, 13, 12, 0, 0, }, /* 737 */
+ { 122, 7, 12, 0, 0, }, /* 738 */
+ { 122, 10, 3, 0, 0, }, /* 739 */
+ { 122, 10, 5, 0, 0, }, /* 740 */
+ { 122, 12, 3, 0, 0, }, /* 741 */
+ { 122, 21, 12, 0, 0, }, /* 742 */
+ { 113, 7, 12, 0, 0, }, /* 743 */
+ { 113, 10, 5, 0, 0, }, /* 744 */
+ { 113, 12, 3, 0, 0, }, /* 745 */
+ { 113, 21, 12, 0, 0, }, /* 746 */
+ { 113, 13, 12, 0, 0, }, /* 747 */
+ { 101, 7, 12, 0, 0, }, /* 748 */
+ { 101, 12, 3, 0, 0, }, /* 749 */
+ { 101, 10, 5, 0, 0, }, /* 750 */
+ { 101, 13, 12, 0, 0, }, /* 751 */
+ { 125, 7, 12, 0, 0, }, /* 752 */
+ { 125, 12, 3, 0, 0, }, /* 753 */
+ { 125, 10, 5, 0, 0, }, /* 754 */
+ { 125, 13, 12, 0, 0, }, /* 755 */
+ { 125, 15, 12, 0, 0, }, /* 756 */
+ { 125, 21, 12, 0, 0, }, /* 757 */
+ { 125, 26, 12, 0, 0, }, /* 758 */
+ { 141, 7, 12, 0, 0, }, /* 759 */
+ { 141, 10, 5, 0, 0, }, /* 760 */
+ { 141, 12, 3, 0, 0, }, /* 761 */
+ { 141, 21, 12, 0, 0, }, /* 762 */
+ { 124, 9, 12, 0, 32, }, /* 763 */
+ { 124, 5, 12, 0, -32, }, /* 764 */
+ { 124, 13, 12, 0, 0, }, /* 765 */
+ { 124, 15, 12, 0, 0, }, /* 766 */
+ { 124, 7, 12, 0, 0, }, /* 767 */
+ { 140, 7, 12, 0, 0, }, /* 768 */
+ { 140, 12, 3, 0, 0, }, /* 769 */
+ { 140, 10, 5, 0, 0, }, /* 770 */
+ { 140, 7, 4, 0, 0, }, /* 771 */
+ { 140, 21, 12, 0, 0, }, /* 772 */
+ { 139, 7, 12, 0, 0, }, /* 773 */
+ { 139, 12, 3, 0, 0, }, /* 774 */
+ { 139, 10, 5, 0, 0, }, /* 775 */
+ { 139, 7, 4, 0, 0, }, /* 776 */
+ { 139, 21, 12, 0, 0, }, /* 777 */
+ { 121, 7, 12, 0, 0, }, /* 778 */
+ { 132, 7, 12, 0, 0, }, /* 779 */
+ { 132, 10, 5, 0, 0, }, /* 780 */
+ { 132, 12, 3, 0, 0, }, /* 781 */
+ { 132, 21, 12, 0, 0, }, /* 782 */
+ { 132, 13, 12, 0, 0, }, /* 783 */
+ { 132, 15, 12, 0, 0, }, /* 784 */
+ { 133, 21, 12, 0, 0, }, /* 785 */
+ { 133, 7, 12, 0, 0, }, /* 786 */
+ { 133, 12, 3, 0, 0, }, /* 787 */
+ { 133, 10, 5, 0, 0, }, /* 788 */
+ { 137, 7, 12, 0, 0, }, /* 789 */
+ { 137, 12, 3, 0, 0, }, /* 790 */
+ { 137, 7, 4, 0, 0, }, /* 791 */
+ { 137, 13, 12, 0, 0, }, /* 792 */
+ { 142, 7, 12, 0, 0, }, /* 793 */
+ { 142, 10, 5, 0, 0, }, /* 794 */
+ { 142, 12, 3, 0, 0, }, /* 795 */
+ { 142, 13, 12, 0, 0, }, /* 796 */
+ { 144, 7, 12, 0, 0, }, /* 797 */
+ { 144, 12, 3, 0, 0, }, /* 798 */
+ { 144, 10, 5, 0, 0, }, /* 799 */
+ { 144, 21, 12, 0, 0, }, /* 800 */
+ { 62, 7, 12, 0, 0, }, /* 801 */
+ { 62, 14, 12, 0, 0, }, /* 802 */
+ { 62, 21, 12, 0, 0, }, /* 803 */
+ { 79, 7, 12, 0, 0, }, /* 804 */
+ { 126, 7, 12, 0, 0, }, /* 805 */
+ { 114, 7, 12, 0, 0, }, /* 806 */
+ { 114, 13, 12, 0, 0, }, /* 807 */
+ { 114, 21, 12, 0, 0, }, /* 808 */
+ { 102, 7, 12, 0, 0, }, /* 809 */
+ { 102, 12, 3, 0, 0, }, /* 810 */
+ { 102, 21, 12, 0, 0, }, /* 811 */
+ { 118, 7, 12, 0, 0, }, /* 812 */
+ { 118, 12, 3, 0, 0, }, /* 813 */
+ { 118, 21, 12, 0, 0, }, /* 814 */
+ { 118, 26, 12, 0, 0, }, /* 815 */
+ { 118, 6, 12, 0, 0, }, /* 816 */
+ { 118, 13, 12, 0, 0, }, /* 817 */
+ { 118, 15, 12, 0, 0, }, /* 818 */
+ { 145, 9, 12, 0, 32, }, /* 819 */
+ { 145, 5, 12, 0, -32, }, /* 820 */
+ { 145, 15, 12, 0, 0, }, /* 821 */
+ { 145, 21, 12, 0, 0, }, /* 822 */
+ { 98, 7, 12, 0, 0, }, /* 823 */
+ { 98, 10, 5, 0, 0, }, /* 824 */
+ { 98, 12, 3, 0, 0, }, /* 825 */
+ { 98, 6, 12, 0, 0, }, /* 826 */
+ { 136, 6, 12, 0, 0, }, /* 827 */
+ { 138, 6, 12, 0, 0, }, /* 828 */
+ { 136, 7, 12, 0, 0, }, /* 829 */
+ { 138, 7, 12, 0, 0, }, /* 830 */
+ { 104, 7, 12, 0, 0, }, /* 831 */
+ { 104, 26, 12, 0, 0, }, /* 832 */
+ { 104, 12, 3, 0, 0, }, /* 833 */
+ { 104, 21, 12, 0, 0, }, /* 834 */
+ { 9, 10, 3, 0, 0, }, /* 835 */
+ { 19, 12, 3, 0, 0, }, /* 836 */
+ { 130, 26, 12, 0, 0, }, /* 837 */
+ { 130, 12, 3, 0, 0, }, /* 838 */
+ { 130, 21, 12, 0, 0, }, /* 839 */
+ { 17, 12, 3, 0, 0, }, /* 840 */
+ { 112, 7, 12, 0, 0, }, /* 841 */
+ { 112, 15, 12, 0, 0, }, /* 842 */
+ { 112, 12, 3, 0, 0, }, /* 843 */
+ { 131, 9, 12, 0, 34, }, /* 844 */
+ { 131, 5, 12, 0, -34, }, /* 845 */
+ { 131, 12, 3, 0, 0, }, /* 846 */
+ { 131, 13, 12, 0, 0, }, /* 847 */
+ { 131, 21, 12, 0, 0, }, /* 848 */
+ { 9, 2, 14, 0, 0, }, /* 849 */
+ { 9, 26, 11, 0, 0, }, /* 850 */
+ { 26, 26, 12, 0, 0, }, /* 851 */
+ { 9, 24, 3, 0, 0, }, /* 852 */
+ { 9, 1, 3, 0, 0, }, /* 853 */
};
const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
@@ -1123,3226 +968,3216 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, /* U+1800 */
62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, /* U+2000 */
77, 77, 78, 79, 66, 66, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, /* U+2800 */
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 98, 98, 98, 98, 98, 98, 98, /* U+3000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+3800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+4000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 99, 98, 98, 98, 98, /* U+4800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+5000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+5800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+6000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+6800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+7000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+7800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+8000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+8800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+9000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,100, /* U+9800 */
-101,102,102,102,102,102,102,102,102,103,104,104,105,106,107,108, /* U+A000 */
-109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,117, /* U+A800 */
-118,119,120,121,122,123,117,118,119,120,121,122,123,117,118,119, /* U+B000 */
-120,121,122,123,117,118,119,120,121,122,123,117,118,119,120,121, /* U+B800 */
-122,123,117,118,119,120,121,122,123,117,118,119,120,121,122,123, /* U+C000 */
-117,118,119,120,121,122,123,117,118,119,120,121,122,123,117,118, /* U+C800 */
-119,120,121,122,123,117,118,119,120,121,122,123,117,118,119,124, /* U+D000 */
-125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+D800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+E000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+E800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F000 */
-126,126, 98, 98,127,128,129,130,131,131,132,133,134,135,136,137, /* U+F800 */
-138,139,140,141,142,143,144,145,146,147,148,142,149,149,150,142, /* U+10000 */
-151,152,153,154,155,156,157,158,159,160,161,142,162,142,163,142, /* U+10800 */
-164,165,166,167,168,169,170,142,171,172,142,173,174,175,176,142, /* U+11000 */
-177,178,142,142,179,180,142,142,181,182,183,184,142,185,142,142, /* U+11800 */
-186,186,186,186,186,186,186,187,188,186,189,142,142,142,142,142, /* U+12000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+12800 */
-190,190,190,190,190,190,190,190,191,142,142,142,142,142,142,142, /* U+13000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+13800 */
-142,142,142,142,142,142,142,142,192,192,192,192,193,142,142,142, /* U+14000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+14800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+16000 */
-194,194,194,194,195,196,197,198,142,142,142,142,199,200,201,202, /* U+16800 */
-203,203,203,203,203,203,203,203,203,203,203,203,203,203,203,203, /* U+17000 */
-203,203,203,203,203,203,203,203,203,203,203,203,203,203,203,203, /* U+17800 */
-203,203,203,203,203,203,203,203,203,203,203,203,203,203,203,204, /* U+18000 */
-203,203,203,203,203,205,142,142,142,142,142,142,142,142,142,142, /* U+18800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A800 */
-206,207,208,209,209,210,142,142,142,142,142,142,142,142,142,142, /* U+1B000 */
-142,142,142,142,142,142,142,142,211,212,142,142,142,142,142,142, /* U+1B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C800 */
- 71,213,214,215,216,217,218,142,219,220,221,222,223,224,225,226, /* U+1D000 */
-227,227,227,227,228,229,142,142,142,142,142,142,142,142,142,142, /* U+1D800 */
-230,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1E000 */
-231,232,233,142,142,142,142,142,234,235,142,142,236,237,142,142, /* U+1E800 */
-238,239,240,241,242,243,244,245,244,244,246,244,247,248,249,250, /* U+1F000 */
-251,252,253,254,255,243,243,243,243,243,243,243,243,243,243,256, /* U+1F800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+21000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+21800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+22000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+22800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+23000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+23800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+24000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+24800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+25000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+25800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+26000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+26800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+27000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+27800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+28000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+28800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,257, 98, 98, /* U+2A000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2A800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,258, 98, /* U+2B000 */
-259, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2B800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2C000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,260, 98, 98, /* U+2C800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2E000 */
- 98, 98, 98, 98, 98, 98, 98,261,142,142,142,142,142,142,142,142, /* U+2E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+2F000 */
- 98, 98, 98, 98,262,142,142,142,142,142,142,142,142,142,142,142, /* U+2F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+30000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+30800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+31000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+31800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+33000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+33800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+34000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+34800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+35000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+35800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+36000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+36800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+37000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+37800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+38000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+38800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+39000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+39800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+40000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+40800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+41000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+41800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+42000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+42800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+43000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+43800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+44000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+44800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+45000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+45800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+46000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+46800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+47000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+47800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+48000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+48800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+49000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+49800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+50000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+50800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+51000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+51800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+52000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+52800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+53000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+53800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+54000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+54800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+55000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+55800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+56000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+56800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+57000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+57800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+58000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+58800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+59000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+59800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+60000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+60800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+61000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+61800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+62000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+62800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+63000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+63800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+64000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+64800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+65000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+65800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+66000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+66800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+67000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+67800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+68000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+68800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+69000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+69800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+70000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+70800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+71000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+71800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+72000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+72800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+73000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+73800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+74000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+74800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+75000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+75800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+76000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+76800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+77000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+77800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+78000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+78800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+79000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+79800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+80000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+80800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+81000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+81800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+82000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+82800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+83000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+83800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+84000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+84800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+85000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+85800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+86000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+86800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+87000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+87800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+88000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+88800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+89000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+89800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+90000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+90800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+91000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+91800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+92000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+92800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+93000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+93800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+94000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+94800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+95000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+95800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+96000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+96800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+97000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+97800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+98000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+98800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+99000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+99800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9A000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9A800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9B000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9B800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9C000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9C800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9D000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9D800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9E000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9E800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9F000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A0000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A0800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A1000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A1800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A2000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A2800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A3000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A3800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A4000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A4800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A5000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A5800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A6000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A6800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A7000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A7800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A8000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A8800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A9000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A9800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AA000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AA800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AB000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AB800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AC000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AC800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AD000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AD800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AE000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AE800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AF000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AF800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B0000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B0800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B1000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B1800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B2000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B2800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B3000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B3800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B4000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B4800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B5000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B5800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B6000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B6800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B7000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B7800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B8000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B8800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B9000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B9800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BA000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BA800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BB000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BB800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BC000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BC800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BD000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BD800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BE000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BE800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BF000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BF800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C0000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C0800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C1000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C1800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C2000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C2800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C3000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C3800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C4000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C4800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C5000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C5800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C6000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C6800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C7000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C7800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C8000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C8800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C9000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C9800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CA000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CA800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CB000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CB800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CC000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CC800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CD000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CD800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CE000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CE800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CF000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CF800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D0000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D0800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D1000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D1800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D2000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D2800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D3000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D3800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D4000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D4800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D5000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D5800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D6000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D6800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D7000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D7800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D8000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D8800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D9000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D9800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DA000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DA800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DB000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DB800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DC000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DC800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DD000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DD800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DE000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DE800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF800 */
-263,264,265,266,264,264,264,264,264,264,264,264,264,264,264,264, /* U+E0000 */
-264,264,264,264,264,264,264,264,264,264,264,264,264,264,264,264, /* U+E0800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E2000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E2800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E3000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E3800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E4000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E4800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E5000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E5800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E6000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E6800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E7000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E7800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E8000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E8800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E9000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E9800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EA000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EA800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EB000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EB800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EC000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EC800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+ED000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+ED800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EE000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EE800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EF000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EF800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F0000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F0800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F1000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F1800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F2000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F2800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F3000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F3800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F4000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F4800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F5000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F5800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F6000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F6800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F7000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F7800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F8000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F8800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F9000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F9800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FA000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FA800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FB000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FB800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FC000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FC800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FD000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FD800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FF000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,267, /* U+FF800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+101000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+101800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+102000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+102800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+103000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+103800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+104000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+104800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+105000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+105800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+106000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+106800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+107000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+107800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+108000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+108800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+109000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+109800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10A000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10A800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10B000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10B800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10C000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10C800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10D000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10D800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E800 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10F000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,267, /* U+10F800 */
+ 90, 91, 92, 93, 94, 95, 96, 71, 97, 97, 97, 97, 97, 97, 97, 97, /* U+3000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+3800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+4000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 98, 97, 97, 97, 97, /* U+4800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+5000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+5800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+6000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+6800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+7000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+7800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+8000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+8800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+9000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 99, /* U+9800 */
+100,101,101,101,101,101,101,101,101,102,103,103,104,105,106,107, /* U+A000 */
+108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,116, /* U+A800 */
+117,118,119,120,121,122,116,117,118,119,120,121,122,116,117,118, /* U+B000 */
+119,120,121,122,116,117,118,119,120,121,122,116,117,118,119,120, /* U+B800 */
+121,122,116,117,118,119,120,121,122,116,117,118,119,120,121,122, /* U+C000 */
+116,117,118,119,120,121,122,116,117,118,119,120,121,122,116,117, /* U+C800 */
+118,119,120,121,122,116,117,118,119,120,121,122,116,117,118,123, /* U+D000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+D800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F000 */
+125,125, 97, 97,126,127,128,129,130,130,131,132,133,134,135,136, /* U+F800 */
+137,138,139,140,141,142,143,144,145,146,147,141,148,148,149,141, /* U+10000 */
+150,151,152,153,154,155,156,157,158,159,160,141,161,141,162,141, /* U+10800 */
+163,164,165,166,167,168,169,141,170,171,141,172,173,174,175,141, /* U+11000 */
+176,177,141,141,178,179,141,141,180,181,182,183,141,184,141,141, /* U+11800 */
+185,185,185,185,185,185,185,186,187,185,188,141,141,141,141,141, /* U+12000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+12800 */
+189,189,189,189,189,189,189,189,190,141,141,141,141,141,141,141, /* U+13000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+13800 */
+141,141,141,141,141,141,141,141,191,191,191,191,192,141,141,141, /* U+14000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+14800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+16000 */
+193,193,193,193,194,195,196,197,141,141,141,141,198,199,200,201, /* U+16800 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202, /* U+17000 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202, /* U+17800 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,203, /* U+18000 */
+202,202,202,202,202,204,141,141,141,141,141,141,141,141,141,141, /* U+18800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1A800 */
+205,206,207,208,208,209,141,141,141,141,141,141,141,141,141,141, /* U+1B000 */
+141,141,141,141,141,141,141,141,210,211,141,141,141,141,141,141, /* U+1B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1C800 */
+ 71,212,213,214,215,216,217,141,218,219,220,221,222,223,224,225, /* U+1D000 */
+226,226,226,226,227,228,141,141,141,141,141,141,141,141,141,141, /* U+1D800 */
+229,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1E000 */
+230,231,232,141,141,141,141,141,233,234,141,141,235,236,141,141, /* U+1E800 */
+237,238,239,240,241,242,243,244,243,243,245,243,246,247,248,249, /* U+1F000 */
+250,251,252,253,254,242,242,242,242,242,242,242,242,242,242,255, /* U+1F800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+20000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+20800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+21000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+21800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+22000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+22800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+23000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+23800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+24000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+24800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+25000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+25800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+26000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+26800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+27000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+27800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+28000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+28800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+29000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+29800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,256, 97, 97, /* U+2A000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2A800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,257, 97, /* U+2B000 */
+258, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2B800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2C000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,259, 97, 97, /* U+2C800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2D000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2D800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2E000 */
+ 97, 97, 97, 97, 97, 97, 97,260,141,141,141,141,141,141,141,141, /* U+2E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+2F000 */
+ 97, 97, 97, 97,261,141,141,141,141,141,141,141,141,141,141,141, /* U+2F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+30000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+30800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+31000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+31800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF800 */
+262,263,264,265,263,263,263,263,263,263,263,263,263,263,263,263, /* U+E0000 */
+263,263,263,263,263,263,263,263,263,263,263,263,263,263,263,263, /* U+E0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FF000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,266, /* U+FF800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10F000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,266, /* U+10F800 */
};
-const uint16_t PRIV(ucd_stage2)[] = { /* 68608 bytes, block = 128 */
+const uint16_t PRIV(ucd_stage2)[] = { /* 68352 bytes, block = 128 */
/* block 0 */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 4, 5, 5, 5, 6, 5, 5, 5, 7, 8, 5, 9, 5, 10, 5, 5,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 9, 9, 9, 5,
- 5, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12, 12, 12, 12,
- 12, 12, 12, 14, 12, 12, 12, 12, 12, 12, 12, 7, 5, 8, 15, 16,
- 15, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 17, 17, 17, 17,
- 17, 17, 17, 19, 17, 17, 17, 17, 17, 17, 17, 7, 9, 8, 9, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 8, 8, 8, 4,
+ 4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11,
+ 11, 11, 11, 13, 11, 11, 11, 11, 11, 11, 11, 6, 4, 7, 14, 15,
+ 14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 16, 16, 16, 16,
+ 16, 16, 16, 18, 16, 16, 16, 16, 16, 16, 16, 6, 8, 7, 8, 0,
/* block 1 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 4, 5, 6, 6, 6, 6, 20, 5, 15, 21, 22, 23, 9, 24, 21, 15,
- 20, 9, 25, 25, 15, 26, 5, 5, 15, 25, 22, 27, 25, 25, 25, 5,
- 12, 12, 12, 12, 12, 28, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 9, 12, 12, 12, 12, 12, 12, 12, 29,
- 17, 17, 17, 17, 17, 30, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 9, 17, 17, 17, 17, 17, 17, 17, 31,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 4, 5, 5, 5, 5, 19, 4, 14, 20, 21, 22, 8, 23, 20, 14,
+ 19, 8, 24, 24, 14, 25, 4, 4, 14, 24, 21, 26, 24, 24, 24, 4,
+ 11, 11, 11, 11, 11, 27, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 8, 11, 11, 11, 11, 11, 11, 11, 28,
+ 16, 16, 16, 16, 16, 29, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 8, 16, 16, 16, 16, 16, 16, 16, 30,
/* block 2 */
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 34, 35, 32, 33, 32, 33, 32, 33, 35, 32, 33, 32, 33, 32, 33, 32,
- 33, 32, 33, 32, 33, 32, 33, 32, 33, 35, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 36, 32, 33, 32, 33, 32, 33, 37,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 33, 34, 31, 32, 31, 32, 31, 32, 34, 31, 32, 31, 32, 31, 32, 31,
+ 32, 31, 32, 31, 32, 31, 32, 31, 32, 34, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 35, 31, 32, 31, 32, 31, 32, 36,
/* block 3 */
- 38, 39, 32, 33, 32, 33, 40, 32, 33, 41, 41, 32, 33, 35, 42, 43,
- 44, 32, 33, 41, 45, 46, 47, 48, 32, 33, 49, 35, 47, 50, 51, 52,
- 32, 33, 32, 33, 32, 33, 53, 32, 33, 53, 35, 35, 32, 33, 53, 32,
- 33, 54, 54, 32, 33, 32, 33, 55, 32, 33, 35, 22, 32, 33, 35, 56,
- 22, 22, 22, 22, 57, 58, 59, 60, 61, 62, 63, 64, 65, 32, 33, 32,
- 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 66, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 35, 67, 68, 69, 32, 33, 70, 71, 32, 33, 32, 33, 32, 33, 32, 33,
+ 37, 38, 31, 32, 31, 32, 39, 31, 32, 40, 40, 31, 32, 34, 41, 42,
+ 43, 31, 32, 40, 44, 45, 46, 47, 31, 32, 48, 34, 46, 49, 50, 51,
+ 31, 32, 31, 32, 31, 32, 52, 31, 32, 52, 34, 34, 31, 32, 52, 31,
+ 32, 53, 53, 31, 32, 31, 32, 54, 31, 32, 34, 21, 31, 32, 34, 55,
+ 21, 21, 21, 21, 56, 57, 58, 59, 60, 61, 62, 63, 64, 31, 32, 31,
+ 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 65, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 34, 66, 67, 68, 31, 32, 69, 70, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 4 */
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 72, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 35, 35, 35, 35, 35, 35, 73, 32, 33, 74, 75, 76,
- 76, 32, 33, 77, 78, 79, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 80, 81, 82, 83, 84, 35, 85, 85, 35, 86, 35, 87, 88, 35, 35, 35,
- 85, 89, 35, 90, 35, 91, 92, 35, 93, 94, 92, 95, 96, 35, 35, 94,
- 35, 97, 98, 35, 35, 99, 35, 35, 35, 35, 35, 35, 35,100, 35, 35,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 71, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 34, 34, 34, 34, 34, 34, 72, 31, 32, 73, 74, 75,
+ 75, 31, 32, 76, 77, 78, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 79, 80, 81, 82, 83, 34, 84, 84, 34, 85, 34, 86, 87, 34, 34, 34,
+ 84, 88, 34, 89, 34, 90, 91, 34, 92, 93, 91, 94, 95, 34, 34, 93,
+ 34, 96, 97, 34, 34, 98, 34, 34, 34, 34, 34, 34, 34, 99, 34, 34,
/* block 5 */
-101, 35, 35,101, 35, 35, 35,102,101,103,104,104,105, 35, 35, 35,
- 35, 35,106, 35, 22, 35, 35, 35, 35, 35, 35, 35, 35,107,108, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
-109,109,109,109,109,109,109,109,109,110,110,110,110,110,110,110,
-110,110, 15, 15, 15, 15,110,110,110,110,110,110,110,110,110,110,
-110,110, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-109,109,109,109,109, 15, 15, 15, 15, 15,111,111,110, 15,110, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+100, 34, 34,100, 34, 34, 34,101,100,102,103,103,104, 34, 34, 34,
+ 34, 34,105, 34, 21, 34, 34, 34, 34, 34, 34, 34, 34,106,107, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+108,108,108,108,108,108,108,108,108,109,109,109,109,109,109,109,
+109,109, 14, 14, 14, 14,109,109,109,109,109,109,109,109,109,109,
+109,109, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+108,108,108,108,108, 14, 14, 14, 14, 14,110,110,109, 14,109, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
/* block 6 */
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,113,112,112,114,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,115,115,115,115,115,115,115,115,115,115,115,115,115,
-116,117,116,117,110,118,116,117,119,119,120,121,121,121, 5,122,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,112,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+113,114,113,114,109,115,113,114,116,116,117,118,118,118, 4,119,
/* block 7 */
-119,119,119,119,118, 15,123, 5,124,124,124,119,125,119,126,126,
-127,128,129,128,128,130,128,128,131,132,133,128,134,128,128,128,
-135,136,119,137,128,128,138,128,128,139,128,128,140,141,141,141,
-127,142,143,142,142,144,142,142,145,146,147,142,148,142,142,142,
-149,150,151,152,142,142,153,142,142,154,142,142,155,156,156,157,
-158,159,160,160,160,161,162,163,116,117,116,117,116,117,116,117,
-116,117,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-166,167,168,169,170,171,172,116,117,173,116,117,127,174,174,174,
+116,116,116,116,115, 14,120, 4,121,121,121,116,122,116,123,123,
+124,125,126,125,125,127,125,125,128,129,130,125,131,125,125,125,
+132,133,116,134,125,125,135,125,125,136,125,125,137,138,138,138,
+124,139,140,139,139,141,139,139,142,143,144,139,145,139,139,139,
+146,147,148,149,139,139,150,139,139,151,139,139,152,153,153,154,
+155,156,157,157,157,158,159,160,113,114,113,114,113,114,113,114,
+113,114,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+163,164,165,166,167,168,169,113,114,170,113,114,124,171,171,171,
/* block 8 */
-175,175,175,175,175,175,175,175,175,175,175,175,175,175,175,175,
-176,176,177,176,178,176,176,176,176,176,176,176,176,176,179,176,
-176,180,181,176,176,176,176,176,176,176,182,176,176,176,176,176,
-183,183,184,183,185,183,183,183,183,183,183,183,183,183,186,183,
-183,187,188,183,183,183,183,183,183,183,189,183,183,183,183,183,
-190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,
-191,192,193,194,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
+172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,
+173,173,174,173,175,173,173,173,173,173,173,173,173,173,176,173,
+173,177,178,173,173,173,173,173,173,173,179,173,173,173,173,173,
+180,180,181,180,182,180,180,180,180,180,180,180,180,180,183,180,
+180,184,185,180,180,180,180,180,180,180,186,180,180,180,180,180,
+187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,
+188,189,190,191,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
/* block 9 */
-191,192,195,196,197,198,198,197,199,199,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-200,191,192,191,192,191,192,191,192,191,192,191,192,191,192,201,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
+188,189,192,193,193,111,111,193,194,194,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+195,188,189,188,189,188,189,188,189,188,189,188,189,188,189,196,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
/* block 10 */
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-119,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,
-202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,
-202,202,202,202,202,202,202,119,119,203,204,204,204,204,204,204,
-205,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,
-206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+116,197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,
+197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,
+197,197,197,197,197,197,197,116,116,198,199,199,199,199,199,199,
+200,201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,
+201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,
/* block 11 */
-206,206,206,206,206,206,206,205,205,207,208,119,119,209,209,210,
-119,211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,
-211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,
-211,211,211,211,211,211,211,211,211,211,211,211,211,211,212,211,
-213,211,211,213,211,211,213,211,119,119,119,119,119,119,119,119,
-214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,
-214,214,214,214,214,214,214,214,214,214,214,119,119,119,119,214,
-214,214,214,213,213,119,119,119,119,119,119,119,119,119,119,119,
+201,201,201,201,201,201,201,200,200, 4,202,116,116,203,203,204,
+116,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
+205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
+205,205,205,205,205,205,205,205,205,205,205,205,205,205,206,205,
+207,205,205,207,205,205,207,205,116,116,116,116,116,116,116,116,
+208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,
+208,208,208,208,208,208,208,208,208,208,208,116,116,116,116,208,
+208,208,208,207,207,116,116,116,116,116,116,116,116,116,116,116,
/* block 12 */
-215,215,215,215,215,216,217,217,217,218,218,219,220,218,221,221,
-222,222,222,222,222,222,222,222,222,222,222,220,223,119,218,220,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-225,224,224,224,224,224,224,224,224,224,224,226,226,226,226,226,
-226,226,226,226,226,226,222,222,222,222,222,222,222,222,222,222,
-227,227,227,227,227,227,227,227,227,227,218,218,218,218,224,224,
-226,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+209,209,209,209,209,210,211,211,211,212,212,213, 4,212,214,214,
+215,215,215,215,215,215,215,215,215,215,215, 4,216,116,212, 4,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+109,217,217,217,217,217,217,217,217,217,217,111,111,111,111,111,
+111,111,111,111,111,111,215,215,215,215,215,215,215,215,215,215,
+218,218,218,218,218,218,218,218,218,218,212,212,212,212,217,217,
+111,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 13 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,228,224,222,222,222,222,222,222,222,216,221,222,
-222,222,222,222,222,229,229,222,222,221,222,222,222,222,224,224,
-230,230,230,230,230,230,230,230,230,230,224,224,224,221,221,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,212,217,215,215,215,215,215,215,215,210,214,215,
+215,215,215,215,215,219,219,215,215,214,215,215,215,215,217,217,
+218,218,218,218,218,218,218,218,218,218,217,217,217,214,214,217,
/* block 14 */
-231,231,231,231,231,231,231,231,231,231,231,231,231,231,119,232,
-233,234,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
-233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
-234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,
-234,234,234,234,234,234,234,234,234,234,234,119,119,233,233,233,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+220,220,220,220,220,220,220,220,220,220,220,220,220,220,116,221,
+222,223,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
+222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
+223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
+223,223,223,223,223,223,223,223,223,223,223,116,116,222,222,222,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 15 */
-235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
-235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
-235,235,235,235,235,235,236,236,236,236,236,236,236,236,236,236,
-236,235,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-237,237,237,237,237,237,237,237,237,237,238,238,238,238,238,238,
-238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,
-238,238,238,238,238,238,238,238,238,238,238,239,239,239,239,239,
-239,239,239,239,240,240,241,242,242,242,240,119,119,239,243,243,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,225,225,225,225,225,225,225,225,225,225,
+225,224,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+226,226,226,226,226,226,226,226,226,226,227,227,227,227,227,227,
+227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,
+227,227,227,227,227,227,227,227,227,227,227,228,228,228,228,228,
+228,228,228,228,229,229,230,231,231,231,229,116,116,228,232,232,
/* block 16 */
-244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,
-244,244,244,244,244,244,245,245,245,245,246,245,245,245,245,245,
-245,245,245,245,246,245,245,245,246,245,245,245,245,245,119,119,
-247,247,247,247,247,247,247,247,247,247,247,247,247,247,247,119,
-248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,
-248,248,248,248,248,248,248,248,248,249,249,249,119,119,250,119,
-233,233,233,233,233,233,233,233,233,233,233,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
+233,233,233,233,233,233,234,234,234,234,235,234,234,234,234,234,
+234,234,234,234,235,234,234,234,235,234,234,234,234,234,116,116,
+236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,116,
+237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,
+237,237,237,237,237,237,237,237,237,238,238,238,116,116,239,116,
+222,222,222,222,222,222,222,222,222,222,222,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 17 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,119,224,224,224,224,224,224,224,224,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,222,222,222,222,222,222,222,222,222,222,222,222,222,
-222,222,216,222,222,222,222,222,222,222,222,222,222,222,222,222,
-222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,116,217,217,217,217,217,217,217,217,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,215,215,215,215,215,215,215,215,215,215,215,215,215,
+215,215,210,215,215,215,215,215,215,215,215,215,215,215,215,215,
+215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,
/* block 18 */
-251,251,251,252,253,253,253,253,253,253,253,253,253,253,253,253,
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
-253,253,253,253,253,253,253,253,253,253,251,252,251,253,252,252,
-252,251,251,251,251,251,251,251,251,252,252,252,252,251,252,252,
-253,254,255,251,251,251,251,251,253,253,253,253,253,253,253,253,
-253,253,251,251,256,257,258,258,258,258,258,258,258,258,258,258,
-259,260,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
+240,240,240,241,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,240,241,240,242,241,241,
+241,240,240,240,240,240,240,240,240,241,241,241,241,240,241,241,
+242,111,111,240,240,240,240,240,242,242,242,242,242,242,242,242,
+242,242,240,240, 4, 4,243,243,243,243,243,243,243,243,243,243,
+244,245,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
/* block 19 */
-261,262,263,263,119,261,261,261,261,261,261,261,261,119,119,261,
-261,119,119,261,261,261,261,261,261,261,261,261,261,261,261,261,
-261,261,261,261,261,261,261,261,261,119,261,261,261,261,261,261,
-261,119,261,119,119,119,261,261,261,261,119,119,262,261,264,263,
-263,262,262,262,262,119,119,263,263,119,119,263,263,262,261,119,
-119,119,119,119,119,119,119,264,119,119,119,119,261,261,119,261,
-261,261,262,262,119,119,265,265,265,265,265,265,265,265,265,265,
-261,261,266,266,267,267,267,267,267,267,268,266,261,269,262,119,
+246,247,248,248,116,246,246,246,246,246,246,246,246,116,116,246,
+246,116,116,246,246,246,246,246,246,246,246,246,246,246,246,246,
+246,246,246,246,246,246,246,246,246,116,246,246,246,246,246,246,
+246,116,246,116,116,116,246,246,246,246,116,116,247,246,249,248,
+248,247,247,247,247,116,116,248,248,116,116,248,248,247,246,116,
+116,116,116,116,116,116,116,249,116,116,116,116,246,246,116,246,
+246,246,247,247,116,116,250,250,250,250,250,250,250,250,250,250,
+246,246,251,251,252,252,252,252,252,252,253,251,246,254,247,116,
/* block 20 */
-119,270,270,271,119,272,272,272,272,272,272,119,119,119,119,272,
-272,119,119,272,272,272,272,272,272,272,272,272,272,272,272,272,
-272,272,272,272,272,272,272,272,272,119,272,272,272,272,272,272,
-272,119,272,272,119,272,272,119,272,272,119,119,270,119,271,271,
-271,270,270,119,119,119,119,270,270,119,119,270,270,270,119,119,
-119,270,119,119,119,119,119,119,119,272,272,272,272,119,272,119,
-119,119,119,119,119,119,273,273,273,273,273,273,273,273,273,273,
-270,270,272,272,272,270,274,119,119,119,119,119,119,119,119,119,
+116,255,255,256,116,257,257,257,257,257,257,116,116,116,116,257,
+257,116,116,257,257,257,257,257,257,257,257,257,257,257,257,257,
+257,257,257,257,257,257,257,257,257,116,257,257,257,257,257,257,
+257,116,257,257,116,257,257,116,257,257,116,116,255,116,256,256,
+256,255,255,116,116,116,116,255,255,116,116,255,255,255,116,116,
+116,255,116,116,116,116,116,116,116,257,257,257,257,116,257,116,
+116,116,116,116,116,116,258,258,258,258,258,258,258,258,258,258,
+255,255,257,257,257,255,259,116,116,116,116,116,116,116,116,116,
/* block 21 */
-119,275,275,276,119,277,277,277,277,277,277,277,277,277,119,277,
-277,277,119,277,277,277,277,277,277,277,277,277,277,277,277,277,
-277,277,277,277,277,277,277,277,277,119,277,277,277,277,277,277,
-277,119,277,277,119,277,277,277,277,277,119,119,275,277,276,276,
-276,275,275,275,275,275,119,275,275,276,119,276,276,275,119,119,
-277,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-277,277,275,275,119,119,278,278,278,278,278,278,278,278,278,278,
-279,280,119,119,119,119,119,119,119,277,275,275,275,275,275,275,
+116,260,260,261,116,262,262,262,262,262,262,262,262,262,116,262,
+262,262,116,262,262,262,262,262,262,262,262,262,262,262,262,262,
+262,262,262,262,262,262,262,262,262,116,262,262,262,262,262,262,
+262,116,262,262,116,262,262,262,262,262,116,116,260,262,261,261,
+261,260,260,260,260,260,116,260,260,261,116,261,261,260,116,116,
+262,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+262,262,260,260,116,116,263,263,263,263,263,263,263,263,263,263,
+264,265,116,116,116,116,116,116,116,262,260,260,260,260,260,260,
/* block 22 */
-119,281,282,282,119,283,283,283,283,283,283,283,283,119,119,283,
-283,119,119,283,283,283,283,283,283,283,283,283,283,283,283,283,
-283,283,283,283,283,283,283,283,283,119,283,283,283,283,283,283,
-283,119,283,283,119,283,283,283,283,283,119,119,281,283,284,281,
-282,281,281,281,281,119,119,282,282,119,119,282,282,281,119,119,
-119,119,119,119,119,119,281,284,119,119,119,119,283,283,119,283,
-283,283,281,281,119,119,285,285,285,285,285,285,285,285,285,285,
-286,283,287,287,287,287,287,287,119,119,119,119,119,119,119,119,
+116,266,267,267,116,268,268,268,268,268,268,268,268,116,116,268,
+268,116,116,268,268,268,268,268,268,268,268,268,268,268,268,268,
+268,268,268,268,268,268,268,268,268,116,268,268,268,268,268,268,
+268,116,268,268,116,268,268,268,268,268,116,116,266,268,269,266,
+267,266,266,266,266,116,116,267,267,116,116,267,267,266,116,116,
+116,116,116,116,116,116,266,269,116,116,116,116,268,268,116,268,
+268,268,266,266,116,116,270,270,270,270,270,270,270,270,270,270,
+271,268,272,272,272,272,272,272,116,116,116,116,116,116,116,116,
/* block 23 */
-119,119,288,289,119,289,289,289,289,289,289,119,119,119,289,289,
-289,119,289,289,289,289,119,119,119,289,289,119,289,119,289,289,
-119,119,119,289,289,119,119,119,289,289,289,119,119,119,289,289,
-289,289,289,289,289,289,289,289,289,289,119,119,119,119,290,291,
-288,291,291,119,119,119,291,291,291,119,291,291,291,288,119,119,
-289,119,119,119,119,119,119,290,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,292,292,292,292,292,292,292,292,292,292,
-293,293,293,294,295,295,295,295,295,296,295,119,119,119,119,119,
+116,116,273,274,116,274,274,274,274,274,274,116,116,116,274,274,
+274,116,274,274,274,274,116,116,116,274,274,116,274,116,274,274,
+116,116,116,274,274,116,116,116,274,274,274,116,116,116,274,274,
+274,274,274,274,274,274,274,274,274,274,116,116,116,116,275,276,
+273,276,276,116,116,116,276,276,276,116,276,276,276,273,116,116,
+274,116,116,116,116,116,116,275,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,277,277,277,277,277,277,277,277,277,277,
+278,278,278,279,279,279,279,279,279,280,279,116,116,116,116,116,
/* block 24 */
-297,298,298,298,297,299,299,299,299,299,299,299,299,119,299,299,
-299,119,299,299,299,299,299,299,299,299,299,299,299,299,299,299,
-299,299,299,299,299,299,299,299,299,119,299,299,299,299,299,299,
-299,299,299,299,299,299,299,299,299,299,119,119,119,299,297,297,
-297,298,298,298,298,119,297,297,297,119,297,297,297,297,119,119,
-119,119,119,119,119,297,297,119,299,299,299,119,119,119,119,119,
-299,299,297,297,119,119,300,300,300,300,300,300,300,300,300,300,
-119,119,119,119,119,119,119,119,301,301,301,301,301,301,301,302,
+281,282,282,282,281,283,283,283,283,283,283,283,283,116,283,283,
+283,116,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+283,283,283,283,283,283,283,283,283,116,283,283,283,283,283,283,
+283,283,283,283,283,283,283,283,283,283,116,116,116,283,281,281,
+281,282,282,282,282,116,281,281,281,116,281,281,281,281,116,116,
+116,116,116,116,116,281,281,116,283,283,283,116,116,116,116,116,
+283,283,281,281,116,116,284,284,284,284,284,284,284,284,284,284,
+116,116,116,116,116,116,116,116,285,285,285,285,285,285,285,286,
/* block 25 */
-303,304,305,305,306,303,303,303,303,303,303,303,303,119,303,303,
-303,119,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
-303,303,303,303,303,303,303,303,303,119,303,303,303,303,303,303,
-303,303,303,303,119,303,303,303,303,303,119,119,304,303,305,304,
-305,305,307,305,305,119,304,305,305,119,305,305,304,304,119,119,
-119,119,119,119,119,307,307,119,119,119,119,119,119,119,303,119,
-303,303,304,304,119,119,308,308,308,308,308,308,308,308,308,308,
-119,303,303,119,119,119,119,119,119,119,119,119,119,119,119,119,
+287,288,289,289,290,287,287,287,287,287,287,287,287,116,287,287,
+287,116,287,287,287,287,287,287,287,287,287,287,287,287,287,287,
+287,287,287,287,287,287,287,287,287,116,287,287,287,287,287,287,
+287,287,287,287,116,287,287,287,287,287,116,116,288,287,289,288,
+289,289,291,289,289,116,288,289,289,116,289,289,288,288,116,116,
+116,116,116,116,116,291,291,116,116,116,116,116,116,116,287,116,
+287,287,288,288,116,116,292,292,292,292,292,292,292,292,292,292,
+116,287,287,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 26 */
-309,309,310,310,119,311,311,311,311,311,311,311,311,119,311,311,
-311,119,311,311,311,311,311,311,311,311,311,311,311,311,311,311,
-311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,
-311,311,311,311,311,311,311,311,311,311,311,309,309,311,312,310,
-310,309,309,309,309,119,310,310,310,119,310,310,310,309,313,314,
-119,119,119,119,311,311,311,312,315,315,315,315,315,315,315,311,
-311,311,309,309,119,119,316,316,316,316,316,316,316,316,316,316,
-315,315,315,315,315,315,315,315,315,314,311,311,311,311,311,311,
+293,293,294,294,116,295,295,295,295,295,295,295,295,116,295,295,
+295,116,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
+295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
+295,295,295,295,295,295,295,295,295,295,295,293,293,295,296,294,
+294,293,293,293,293,116,294,294,294,116,294,294,294,293,297,298,
+116,116,116,116,295,295,295,296,299,299,299,299,299,299,299,295,
+295,295,293,293,116,116,300,300,300,300,300,300,300,300,300,300,
+299,299,299,299,299,299,299,299,299,298,295,295,295,295,295,295,
/* block 27 */
-119,119,317,317,119,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,119,119,119,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,119,318,318,318,318,318,318,318,318,318,119,318,119,119,
-318,318,318,318,318,318,318,119,119,119,319,119,119,119,119,320,
-317,317,319,319,319,119,319,119,317,317,317,317,317,317,317,320,
-119,119,119,119,119,119,321,321,321,321,321,321,321,321,321,321,
-119,119,317,317,322,119,119,119,119,119,119,119,119,119,119,119,
+116,116,301,301,116,302,302,302,302,302,302,302,302,302,302,302,
+302,302,302,302,302,302,302,116,116,116,302,302,302,302,302,302,
+302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,
+302,302,116,302,302,302,302,302,302,302,302,302,116,302,116,116,
+302,302,302,302,302,302,302,116,116,116,303,116,116,116,116,304,
+301,301,303,303,303,116,303,116,301,301,301,301,301,301,301,304,
+116,116,116,116,116,116,305,305,305,305,305,305,305,305,305,305,
+116,116,301,301,306,116,116,116,116,116,116,116,116,116,116,116,
/* block 28 */
-119,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,324,323,325,324,324,324,324,324,324,324,119,119,119,119, 6,
-323,323,323,323,323,323,326,324,324,324,324,324,324,324,324,327,
-328,328,328,328,328,328,328,328,328,328,327,327,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,308,307,309,308,308,308,308,308,308,308,116,116,116,116, 5,
+307,307,307,307,307,307,310,308,308,308,308,308,308,308,308,311,
+312,312,312,312,312,312,312,312,312,312,311,311,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 29 */
-119,329,329,119,329,119,119,329,329,119,329,119,119,329,119,119,
-119,119,119,119,329,329,329,329,119,329,329,329,329,329,329,329,
-119,329,329,329,119,329,119,329,119,119,329,329,119,329,329,329,
-329,330,329,331,330,330,330,330,330,330,119,330,330,329,119,119,
-329,329,329,329,329,119,332,119,330,330,330,330,330,330,119,119,
-333,333,333,333,333,333,333,333,333,333,119,119,329,329,329,329,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,313,313,116,313,116,116,313,313,116,313,116,116,313,116,116,
+116,116,116,116,313,313,313,313,116,313,313,313,313,313,313,313,
+116,313,313,313,116,313,116,313,116,116,313,313,116,313,313,313,
+313,314,313,315,314,314,314,314,314,314,116,314,314,313,116,116,
+313,313,313,313,313,116,316,116,314,314,314,314,314,314,116,116,
+317,317,317,317,317,317,317,317,317,317,116,116,313,313,313,313,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 30 */
-334,335,335,335,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,335,336,335,335,335,337,337,335,335,335,335,335,335,
-338,338,338,338,338,338,338,338,338,338,339,339,339,339,339,339,
-339,339,339,339,335,337,335,337,335,337,340,341,340,341,342,342,
-334,334,334,334,334,334,334,334,119,334,334,334,334,334,334,334,
-334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
-334,334,334,334,334,334,334,334,334,334,334,334,334,119,119,119,
-119,337,337,337,337,337,337,337,337,337,337,337,337,337,337,342,
+318,319,319,319,320,320,320,320,320,320,320,320,320,320,320,320,
+320,320,320,319,320,319,319,319,321,321,319,319,319,319,319,319,
+322,322,322,322,322,322,322,322,322,322,323,323,323,323,323,323,
+323,323,323,323,319,321,319,321,319,321,324,325,324,325,326,326,
+318,318,318,318,318,318,318,318,116,318,318,318,318,318,318,318,
+318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
+318,318,318,318,318,318,318,318,318,318,318,318,318,116,116,116,
+116,321,321,321,321,321,321,321,321,321,321,321,321,321,321,326,
/* block 31 */
-337,337,337,337,337,336,337,337,334,334,334,334,334,337,337,337,
-337,337,337,337,337,337,337,337,119,337,337,337,337,337,337,337,
-337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
-337,337,337,337,337,337,337,337,337,337,337,337,337,119,335,335,
-335,335,335,335,335,335,337,335,335,335,335,335,335,119,335,335,
-336,336,336,336,336, 20, 20, 20, 20,336,336,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+321,321,321,321,321,320,321,321,318,318,318,318,318,321,321,321,
+321,321,321,321,321,321,321,321,116,321,321,321,321,321,321,321,
+321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,
+321,321,321,321,321,321,321,321,321,321,321,321,321,116,319,319,
+319,319,319,319,319,319,321,319,319,319,319,319,319,116,319,319,
+320,320,320,320,320, 19, 19, 19, 19,320,320,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 32 */
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-343,343,343,343,343,343,343,343,343,343,343,344,344,345,345,345,
-345,346,345,345,345,345,345,345,344,345,345,346,346,345,345,343,
-347,347,347,347,347,347,347,347,347,347,348,348,348,348,348,348,
-343,343,343,343,343,343,346,346,345,345,343,343,343,343,345,345,
-345,343,344,344,344,343,343,344,344,344,344,344,344,344,343,343,
-343,345,345,345,345,343,343,343,343,343,343,343,343,343,343,343,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+327,327,327,327,327,327,327,327,327,327,327,328,328,329,329,329,
+329,330,329,329,329,329,329,329,328,329,329,330,330,329,329,327,
+331,331,331,331,331,331,331,331,331,331,332,332,332,332,332,332,
+327,327,327,327,327,327,330,330,329,329,327,327,327,327,329,329,
+329,327,328,328,328,327,327,328,328,328,328,328,328,328,327,327,
+327,329,329,329,329,327,327,327,327,327,327,327,327,327,327,327,
/* block 33 */
-343,343,345,344,346,345,345,344,344,344,344,344,344,345,343,344,
-349,349,349,349,349,349,349,349,349,349,344,344,344,345,350,350,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,119,351,119,119,119,119,119,351,119,119,
-352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
-352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
-352,352,352,352,352,352,352,352,352,352,352,353,354,352,352,352,
+327,327,329,328,330,329,329,328,328,328,328,328,328,329,327,328,
+331,331,331,331,331,331,331,331,331,331,328,328,328,329,333,333,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,116,334,116,116,116,116,116,334,116,116,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335, 4,336,335,335,335,
/* block 34 */
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
/* block 35 */
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
/* block 36 */
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,119,358,358,358,358,119,119,
-358,358,358,358,358,358,358,119,358,119,358,358,358,358,119,119,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,116,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
/* block 37 */
-358,358,358,358,358,358,358,358,358,119,358,358,358,358,119,119,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,119,358,358,358,358,119,119,358,358,358,358,358,358,358,119,
-358,119,358,358,358,358,119,119,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
+340,340,340,340,340,340,340,340,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,116,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
/* block 38 */
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,119,358,358,358,358,119,119,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,119,119,359,359,359,
-360,360,360,360,360,360,360,360,360,361,361,361,361,361,361,361,
-361,361,361,361,361,361,361,361,361,361,361,361,361,119,119,119,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,116,116,341,341,341,
+342,342,342,342,342,342,342,342,342,343,343,343,343,343,343,343,
+343,343,343,343,343,343,343,343,343,343,343,343,343,116,116,116,
/* block 39 */
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-362,362,362,362,362,362,362,362,362,362,119,119,119,119,119,119,
-363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,
-363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,
-363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,
-363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,
-363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,
-364,364,364,364,364,364,119,119,365,365,365,365,365,365,119,119,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+344,344,344,344,344,344,344,344,344,344,116,116,116,116,116,116,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+346,346,346,346,346,346,116,116,347,347,347,347,347,347,116,116,
/* block 40 */
-366,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
+348,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 41 */
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 42 */
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,368,368,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,350,350,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 43 */
-369,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,
-370,370,370,370,370,370,370,370,370,370,370,371,372,119,119,119,
-373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,
-373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,
-373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,
-373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,
-373,373,373,373,373,373,373,373,373,373,373, 5, 5, 5,374,374,
-374,373,373,373,373,373,373,373,373,119,119,119,119,119,119,119,
+351,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
+352,352,352,352,352,352,352,352,352,352,352,353,354,116,116,116,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355, 4, 4, 4,356,356,
+356,355,355,355,355,355,355,355,355,116,116,116,116,116,116,116,
/* block 44 */
-375,375,375,375,375,375,375,375,375,375,375,375,375,119,375,375,
-375,375,376,376,376,119,119,119,119,119,119,119,119,119,119,119,
-377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,
-377,377,378,378,378,379,379,119,119,119,119,119,119,119,119,119,
-380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,
-380,380,381,381,119,119,119,119,119,119,119,119,119,119,119,119,
-382,382,382,382,382,382,382,382,382,382,382,382,382,119,382,382,
-382,119,383,383,119,119,119,119,119,119,119,119,119,119,119,119,
+357,357,357,357,357,357,357,357,357,357,357,357,357,116,357,357,
+357,357,358,358,358,116,116,116,116,116,116,116,116,116,116,116,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,360,360,360, 4, 4,116,116,116,116,116,116,116,116,116,
+361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
+361,361,362,362,116,116,116,116,116,116,116,116,116,116,116,116,
+363,363,363,363,363,363,363,363,363,363,363,363,363,116,363,363,
+363,116,364,364,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 45 */
-384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,385,385,386,385,385,385,385,385,385,385,386,386,
-386,386,386,386,386,386,385,386,386,385,385,385,385,385,385,385,
-385,385,385,385,387,387,387,388,387,387,387,389,384,385,119,119,
-390,390,390,390,390,390,390,390,390,390,119,119,119,119,119,119,
-391,391,391,391,391,391,391,391,391,391,119,119,119,119,119,119,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,366,366,367,366,366,366,366,366,366,366,367,367,
+367,367,367,367,367,367,366,367,367,366,366,366,366,366,366,366,
+366,366,366,366,368,368,368,369,368,368,368,370,365,366,116,116,
+371,371,371,371,371,371,371,371,371,371,116,116,116,116,116,116,
+372,372,372,372,372,372,372,372,372,372,116,116,116,116,116,116,
/* block 46 */
-392,392,393,393,392,393,394,392,392,392,392,395,395,395,396,119,
-397,397,397,397,397,397,397,397,397,397,119,119,119,119,119,119,
-398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,399,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,119,119,119,119,119,119,119,
+373,373, 4, 4,373, 4,374,373,373,373,373,375,375,375,376,116,
+377,377,377,377,377,377,377,377,377,377,116,116,116,116,116,116,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,379,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,116,116,116,116,116,116,116,
/* block 47 */
-398,398,398,398,398,395,395,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,
-398,398,398,398,398,398,398,398,398,395,398,119,119,119,119,119,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,
-367,367,367,367,367,367,119,119,119,119,119,119,119,119,119,119,
+378,378,378,378,378,375,375,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,375,378,116,116,116,116,116,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,116,116,116,116,116,116,116,116,116,116,
/* block 48 */
-400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
-400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,119,
-401,401,401,402,402,402,402,401,401,402,402,402,119,119,119,119,
-402,402,401,402,402,402,402,402,402,401,401,401,119,119,119,119,
-403,119,119,119,404,404,405,405,405,405,405,405,405,405,405,405,
-406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,
-406,406,406,406,406,406,406,406,406,406,406,406,406,406,119,119,
-406,406,406,406,406,119,119,119,119,119,119,119,119,119,119,119,
+380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,
+380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,116,
+381,381,381,382,382,382,382,381,381,382,382,382,116,116,116,116,
+382,382,381,382,382,382,382,382,382,381,381,381,116,116,116,116,
+383,116,116,116,384,384,385,385,385,385,385,385,385,385,385,385,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,116,116,
+386,386,386,386,386,116,116,116,116,116,116,116,116,116,116,116,
/* block 49 */
-407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,
-407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,
-407,407,407,407,407,407,407,407,407,407,407,407,119,119,119,119,
-407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,407,
-407,407,407,407,407,407,407,407,407,407,119,119,119,119,119,119,
-408,408,408,408,408,408,408,408,408,408,409,119,119,119,410,410,
-411,411,411,411,411,411,411,411,411,411,411,411,411,411,411,411,
-411,411,411,411,411,411,411,411,411,411,411,411,411,411,411,411,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,387,387,116,116,116,116,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,116,116,116,116,116,116,
+388,388,388,388,388,388,388,388,388,388,389,116,116,116,390,390,
+391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,
+391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,
/* block 50 */
-412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,
-412,412,412,412,412,412,412,413,413,414,414,413,119,119,415,415,
-416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,
-416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,
-416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,416,
-416,416,416,416,416,417,418,417,418,418,418,418,418,418,418,119,
-418,419,418,419,419,418,418,418,418,418,418,418,418,417,417,417,
-417,417,417,418,418,418,418,418,418,418,418,418,418,119,119,418,
+392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
+392,392,392,392,392,392,392,393,393,394,394,393,116,116,395,395,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,397,398,397,398,398,398,398,398,398,398,116,
+398,399,398,399,399,398,398,398,398,398,398,398,398,397,397,397,
+397,397,397,398,398,398,398,398,398,398,398,398,398,116,116,398,
/* block 51 */
-420,420,420,420,420,420,420,420,420,420,119,119,119,119,119,119,
-420,420,420,420,420,420,420,420,420,420,119,119,119,119,119,119,
-421,421,421,421,421,421,421,422,421,421,421,421,421,421,119,119,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,423,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+400,400,400,400,400,400,400,400,400,400,116,116,116,116,116,116,
+400,400,400,400,400,400,400,400,400,400,116,116,116,116,116,116,
+401,401,401,401,401,401,401,402,401,401,401,401,401,401,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,403,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 52 */
-424,424,424,424,425,426,426,426,426,426,426,426,426,426,426,426,
-426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,
-426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,
-426,426,426,426,424,425,424,424,424,424,424,425,424,425,425,425,
-425,425,424,425,425,426,426,426,426,426,426,426,119,119,119,119,
-427,427,427,427,427,427,427,427,427,427,428,428,428,428,428,428,
-428,429,429,429,429,429,429,429,429,429,429,424,424,424,424,424,
-424,424,424,424,429,429,429,429,429,429,429,429,429,119,119,119,
+404,404,404,404,405,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,404,405,404,404,404,404,404,405,404,405,405,405,
+405,405,404,405,405,406,406,406,406,406,406,406,116,116,116,116,
+407,407,407,407,407,407,407,407,407,407,408,408,408,408,408,408,
+408,409,409,409,409,409,409,409,409,409,409,404,404,404,404,404,
+404,404,404,404,409,409,409,409,409,409,409,409,409,116,116,116,
/* block 53 */
-430,430,431,432,432,432,432,432,432,432,432,432,432,432,432,432,
-432,432,432,432,432,432,432,432,432,432,432,432,432,432,432,432,
-432,431,430,430,430,430,431,431,430,430,431,430,430,430,432,432,
-433,433,433,433,433,433,433,433,433,433,432,432,432,432,432,432,
-434,434,434,434,434,434,434,434,434,434,434,434,434,434,434,434,
-434,434,434,434,434,434,434,434,434,434,434,434,434,434,434,434,
-434,434,434,434,434,434,435,436,435,435,436,436,436,435,436,435,
-435,435,436,436,119,119,119,119,119,119,119,119,437,437,437,437,
+410,410,411,412,412,412,412,412,412,412,412,412,412,412,412,412,
+412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,
+412,411,410,410,410,410,411,411,410,410,411,410,410,410,412,412,
+413,413,413,413,413,413,413,413,413,413,412,412,412,412,412,412,
+414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+414,414,414,414,414,414,415,416,415,415,416,416,416,415,416,415,
+415,415,416,416,116,116,116,116,116,116,116,116,417,417,417,417,
/* block 54 */
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,439,439,439,439,439,439,439,439,440,440,440,440,
-440,440,440,440,439,439,440,440,119,119,119,441,441,441,441,441,
-442,442,442,442,442,442,442,442,442,442,119,119,119,438,438,438,
-443,443,443,443,443,443,443,443,443,443,444,444,444,444,444,444,
-444,444,444,444,444,444,444,444,444,444,444,444,444,444,444,444,
-444,444,444,444,444,444,444,444,445,445,445,445,445,445,446,446,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,419,419,419,419,419,419,419,419,420,420,420,420,
+420,420,420,420,419,419,420,420,116,116,116,421,421,421,421,421,
+422,422,422,422,422,422,422,422,422,422,116,116,116,418,418,418,
+423,423,423,423,423,423,423,423,423,423,424,424,424,424,424,424,
+424,424,424,424,424,424,424,424,424,424,424,424,424,424,424,424,
+424,424,424,424,424,424,424,424,425,425,425,425,425,425,426,426,
/* block 55 */
-447,448,449,450,451,452,453,454,455,119,119,119,119,119,119,119,
-456,456,456,456,456,456,456,456,456,456,456,456,456,456,456,456,
-456,456,456,456,456,456,456,456,456,456,456,456,456,456,456,456,
-456,456,456,456,456,456,456,456,456,456,456,119,119,456,456,456,
-457,457,457,457,457,457,457,457,119,119,119,119,119,119,119,119,
-458,459,458,460,459,461,461,462,461,462,463,459,462,462,459,459,
-462,464,459,459,459,459,459,459,459,465,466,465,465,461,465,465,
-465,465,467,467,468,466,466,469,470,470,119,119,119,119,119,119,
+427,428,429,430,431,432,433,434,435,116,116,116,116,116,116,116,
+436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,
+436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,
+436,436,436,436,436,436,436,436,436,436,436,116,116,436,436,436,
+437,437,437,437,437,437,437,437,116,116,116,116,116,116,116,116,
+111,111,111, 4,111,111,111,111,111,111,111,111,111,111,111,111,
+111,438,111,111,111,111,111,111,111,439,439,439,439,111,439,439,
+439,439,438,438,111,439,439,438,111,111,116,116,116,116,116,116,
/* block 56 */
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35,127,127,127,127,127,471,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,120,120,120,
-120,120,109,109,109,109,120,120,120,120,120, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35,472,473, 35, 35, 35,474, 35, 35,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34,124,124,124,124,124,440,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,117,117,117,
+117,117,108,108,108,108,117,117,117,117,117, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34,441,442, 34, 34, 34,443, 34, 34,
/* block 57 */
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,109,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,120,
-113,113,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,119,112,112,112,112,112,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,117,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,116,111,111,111,111,111,
/* block 58 */
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
-475,476, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+444,445, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 59 */
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 35, 35, 35, 35, 35,477, 35, 35,478, 35,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 34, 34, 34, 34, 34,446, 34, 34,447, 34,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 60 */
-479,479,479,479,479,479,479,479,480,480,480,480,480,480,480,480,
-479,479,479,479,479,479,119,119,480,480,480,480,480,480,119,119,
-479,479,479,479,479,479,479,479,480,480,480,480,480,480,480,480,
-479,479,479,479,479,479,479,479,480,480,480,480,480,480,480,480,
-479,479,479,479,479,479,119,119,480,480,480,480,480,480,119,119,
-127,479,127,479,127,479,127,479,119,480,119,480,119,480,119,480,
-479,479,479,479,479,479,479,479,480,480,480,480,480,480,480,480,
-481,481,482,482,482,482,483,483,484,484,485,485,486,486,119,119,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,116,116,449,449,449,449,449,449,116,116,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,116,116,449,449,449,449,449,449,116,116,
+124,448,124,448,124,448,124,448,116,449,116,449,116,449,116,449,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+450,450,451,451,451,451,452,452,453,453,454,454,455,455,116,116,
/* block 61 */
-479,479,479,479,479,479,479,479,487,487,487,487,487,487,487,487,
-479,479,479,479,479,479,479,479,487,487,487,487,487,487,487,487,
-479,479,479,479,479,479,479,479,487,487,487,487,487,487,487,487,
-479,479,127,488,127,119,127,127,480,480,489,489,490,118,491,118,
-118,118,127,488,127,119,127,127,492,492,492,492,490,118,118,118,
-479,479,127,127,119,119,127,127,480,480,493,493,119,118,118,118,
-479,479,127,127,127,168,127,127,480,480,494,494,173,118,118,118,
-119,119,127,488,127,119,127,127,495,495,496,496,490,118,118,119,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,124,457,124,116,124,124,449,449,458,458,459,115,460,115,
+115,115,124,457,124,116,124,124,461,461,461,461,459,115,115,115,
+448,448,124,124,116,116,124,124,449,449,462,462,116,115,115,115,
+448,448,124,124,124,165,124,124,449,449,463,463,170,115,115,115,
+116,116,124,457,124,116,124,124,464,464,465,465,459,115,115,116,
/* block 62 */
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24,497,498, 24, 24,
- 10, 10, 10, 10, 10, 10, 5, 5, 23, 27, 7, 23, 23, 27, 7, 23,
- 5, 5, 5, 5, 5, 5, 5, 5,499,500, 24, 24, 24, 24, 24, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, 27, 5,501, 5, 5, 16,
- 16, 5, 5, 5, 9, 7, 8, 5, 5,501, 5, 5, 5, 5, 5, 5,
- 5, 5, 9, 5, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4,
- 24, 24, 24, 24, 24,502, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25,109,119,119, 25, 25, 25, 25, 25, 25, 9, 9, 9, 7, 8,109,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 23,466,467, 23, 23,
+ 9, 9, 9, 9, 9, 9, 4, 4, 22, 26, 6, 22, 22, 26, 6, 22,
+ 4, 4, 4, 4, 4, 4, 4, 4,468,469, 23, 23, 23, 23, 23, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 22, 26, 4,470, 4, 4, 15,
+ 15, 4, 4, 4, 8, 6, 7, 4, 4,470, 4, 4, 4, 4, 4, 4,
+ 4, 4, 8, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
+ 23, 23, 23, 23, 23,471, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 24,108,116,116, 24, 24, 24, 24, 24, 24, 8, 8, 8, 6, 7,108,
/* block 63 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 9, 9, 9, 7, 8,119,
-109,109,109,109,109,109,109,109,109,109,109,109,109,119,119,119,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-112,112,112,112,112,112,112,112,112,112,112,112,112,423,423,423,
-423,112,423,423,423,112,112,112,112,112,112,112,112,112,112,112,
-503,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 6, 7,116,
+108,108,108,108,108,108,108,108,108,108,108,108,108,116,116,116,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,403,403,403,
+403,111,403,403,403,111,111,111,111,111,111,111,111,111,111,111,
+111,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 64 */
- 20, 20,504, 20, 20, 20, 20,504, 20, 20,505,504,504,504,505,505,
-504,504,504,505, 20,504, 20, 20, 9,504,504,504,504,504, 20, 20,
- 20, 20, 21, 20,504, 20,506, 20,504, 20,507,508,504,504, 20,505,
-504,504,509,504,505,510,510,510,510,511, 20, 20,505,505,504,504,
- 9, 9, 9, 9, 9,504,505,505,505,505, 20, 9, 20, 20,512, 20,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+ 19, 19,472, 19, 19, 19, 19,472, 19, 19,473,472,472,472,473,473,
+472,472,472,473, 19,472, 19, 19, 8,472,472,472,472,472, 19, 19,
+ 19, 19, 20, 19,472, 19,474, 19,472, 19,475,476,472,472, 19,473,
+472,472,477,472,473,439,439,439,439,478, 19, 19,473,473,472,472,
+ 8, 8, 8, 8, 8,472,473,473,473,473, 19, 8, 19, 19,479, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
/* block 65 */
-515,515,515, 32, 33,515,515,515,515, 25, 20, 20,119,119,119,119,
- 9, 9, 9, 9,516, 21, 21, 21, 21, 21, 9, 9, 20, 20, 20, 20,
- 9, 20, 20, 9, 20, 20, 9, 20, 20, 21, 21, 20, 20, 20, 9, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 9,
- 20, 20, 9, 20, 9, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+482,482,482, 31, 32,482,482,482,482, 24, 19, 19,116,116,116,116,
+ 8, 8, 8, 8,483, 20, 20, 20, 20, 20, 8, 8, 19, 19, 19, 19,
+ 8, 19, 19, 8, 19, 19, 8, 19, 19, 20, 20, 19, 19, 19, 8, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8,
+ 19, 19, 8, 19, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 66 */
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 67 */
- 20, 20, 20, 20, 20, 20, 20, 20, 7, 8, 7, 8, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 20, 20, 20, 20,
- 9, 9, 20, 20, 20, 20, 20, 20, 21, 7, 8, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 20, 20, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19, 6, 7, 6, 7, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19, 19,
+ 8, 8, 19, 19, 19, 19, 19, 19, 20, 6, 7, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 19, 19, 19,
/* block 68 */
- 20, 20, 20, 20, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, 9, 9,
- 9, 9, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, 21, 20, 20, 20, 20, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8,
+ 8, 8, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 19, 19, 19, 19, 20, 20, 20, 19, 19, 19, 19, 19,
/* block 69 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 70 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20,517,517,517,517,517,517,517,517,517,517,
-517,517,518,517,517,517,517,517,517,517,517,517,517,517,517,517,
-519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,519,519,519,519,519,519, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,484,484,484,484,484,484,484,484,484,484,
+484,484,485,484,484,484,484,484,484,484,484,484,484,484,484,484,
+486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
+486,486,486,486,486,486,486,486,486,486, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 71 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+
+/* block 72 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 20, 8, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8,483,483,483,483, 8,
+
+/* block 73 */
+ 20, 20, 20, 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,483,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+
+/* block 74 */
+ 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-
-/* block 72 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 21, 9, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 9, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, 9,516,516,516,516, 9,
-
-/* block 73 */
- 21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,516,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-
-/* block 74 */
- 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
/* block 75 */
- 21, 21, 21, 21, 21, 21, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 20, 21, 20, 21, 20, 20, 20, 20, 20, 20, 21, 20, 20,
- 20, 21, 20, 20, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 21, 20, 20, 21, 20, 20, 20, 20, 21, 20, 21, 20,
- 20, 20, 20, 21, 21, 21, 20, 21, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 21, 21, 21, 21, 21, 7, 8, 7, 8, 7, 8, 7, 8,
- 7, 8, 7, 8, 7, 8, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 20, 20, 20, 20, 20, 20, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 19, 20, 19, 20, 19, 19, 19, 19, 19, 19, 20, 19, 19,
+ 19, 20, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 20, 19, 19, 20, 19, 19, 19, 19, 20, 19, 20, 19,
+ 19, 19, 19, 20, 20, 20, 19, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 20, 20, 20, 20, 20, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 6, 7, 6, 7, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 76 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 20, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
- 9, 9, 9, 9, 9, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 7, 8, 7, 8, 7, 8, 7, 8, 7, 8,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 19, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
+ 8, 8, 8, 8, 8, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 77 */
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
/* block 78 */
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9,516,516, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8,483,483, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 79 */
- 9, 9, 9, 7, 8, 7, 8, 7, 8, 7, 8, 7, 8, 7, 8, 7,
- 8, 7, 8, 7, 8, 7, 8, 7, 8, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 7, 8, 7, 8, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 8, 9, 9,
+ 8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
+ 7, 6, 7, 6, 7, 6, 7, 6, 7, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 6, 7, 6, 7, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 7, 8, 8,
/* block 80 */
- 20, 20, 20, 20, 20, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 20, 20, 9, 9, 9, 9, 9, 9, 20, 20, 20,
- 21, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20,119,119, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 19, 19, 19, 19, 19, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 19, 19, 8, 8, 8, 8, 8, 8, 19, 19, 19,
+ 20, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 81 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20,119,119, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20,119, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,119,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,116, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,
/* block 82 */
-521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
-521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
-521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,119,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,119,
- 32, 33,523,524,525,526,527, 32, 33, 32, 33, 32, 33,528,529,530,
-531, 35, 32, 33, 35, 32, 33, 35, 35, 35, 35, 35,109,109,532,532,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,116,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,116,
+ 31, 32,490,491,492,493,494, 31, 32, 31, 32, 31, 32,495,496,497,
+498, 34, 31, 32, 34, 31, 32, 34, 34, 34, 34, 34,108,108,499,499,
/* block 83 */
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,164,165,164,165,164,165,164,165,164,165,164,165,
-164,165,164,165,533,534,534,534,534,534,534,164,165,164,165,535,
-535,535,164,165,119,119,119,119,119,536,536,536,536,537,536,536,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,500,501,501,501,501,501,501,161,162,161,162,502,
+502,502,161,162,116,116,116,116,116,503,503,503,503,504,503,503,
/* block 84 */
-538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
-538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
-538,538,538,538,538,538,119,538,119,119,119,119,119,538,119,119,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,119,119,119,119,119,119,119,540,
-541,119,119,119,119,119,119,119,119,119,119,119,119,119,119,542,
+505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+505,505,505,505,505,505,116,505,116,116,116,116,116,505,116,116,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,116,116,116,116,116,116,116,507,
+508,116,116,116,116,116,116,116,116,116,116,116,116,116,116,509,
/* block 85 */
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,119,119,119,119,119,119,119,119,119,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,119,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,119,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,119,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,119,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,116,116,116,116,116,116,116,116,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,
+193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,
/* block 86 */
- 5, 5, 23, 27, 23, 27, 5, 5, 5, 23, 27, 5, 23, 27, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 10, 5, 5, 10, 5, 23, 27, 5, 5,
- 23, 27, 7, 8, 7, 8, 7, 8, 7, 8, 5, 5, 5, 5, 5,110,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 5, 5, 5, 5,
- 10, 5, 7,544, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+ 4, 4, 22, 26, 22, 26, 4, 4, 4, 22, 26, 4, 22, 26, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 4, 22, 26, 4, 4,
+ 22, 26, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,109,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 4, 4, 4, 4,
+ 9, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 87 */
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,119,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,119,119,119,119,119,119,119,119,119,119,119,119,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,116,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 88 */
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
/* block 89 */
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
-545,545,545,545,545,545,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,119,119,119,119,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,
/* block 90 */
- 4,546,546,547, 20,548,549,550,551,552,551,552,551,552,551,552,
-551,552, 20,553,551,552,551,552,551,552,551,552,554,555,556,556,
- 20,550,550,550,550,550,550,550,550,550,557,557,557,557,558,558,
-559,560,560,560,560,560, 20,553,550,550,550,548,561,562,563,563,
-119,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
+ 3, 4, 4, 4, 19,511,439,512, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7,
+ 19,512,512,512,512,512,512,512,512,512,111,111,111,111,513,513,
+514,109,109,109,109,109, 19, 19,512,512,512,511,439,470, 19, 19,
+116,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 91 */
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,119,119,565,565,566,566,567,567,564,
-568,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,546,560,570,570,569,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,116,116,111,111, 14, 14,516,516,515,
+ 9,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517, 4,109,518,518,517,
/* block 92 */
-119,119,119,119,119,571,571,571,571,571,571,571,571,571,571,571,
-571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
-571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
-119,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
+116,116,116,116,116,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+116,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
/* block 93 */
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,119,
-563,563,573,573,573,573,563,563,563,563,563,563,563,563,563,563,
-571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
-571,571,571,571,571,571,571,571,571,571,571,119,119,119,119,119,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,119,119,119,119,119,119,119,119,119,119,119,119,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,116,
+ 19, 19, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,116,116,116,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
/* block 94 */
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,119,
-573,573,573,573,573,573,573,573,573,573,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563, 25, 25, 25, 25, 25, 25, 25, 25,
- 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574, 20,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 24, 24, 24, 24, 24, 24, 24, 24,
+ 19, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521, 19,
/* block 95 */
-573,573,573,573,573,573,573,573,573,573,563,563,563,563,563,563,
-563,563,563,563,563,563,563,575,563,575,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-563,563,563,563,563,563,563,563,563,563,563,563, 20, 20, 20, 20,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,119,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 20, 19, 20, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,116,
/* block 96 */
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,
-576,576,576,576,576,576,576,576,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,563,563,563,563,563,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 97 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563, 20,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 98 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 99 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,119,119,119,119,119,119,119,119,119,119,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 100 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,525,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
/* block 101 */
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,579,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
/* block 102 */
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
+524,524,524,524,524,524,524,524,524,524,524,524,524,116,116,116,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,116,116,116,116,116,116,116,116,116,
+527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,
+527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,
+527,527,527,527,527,527,527,527,528,528,528,528,528,528,529,529,
/* block 103 */
-578,578,578,578,578,578,578,578,578,578,578,578,578,119,119,119,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,119,119,119,119,119,119,119,119,119,
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,581,581,582,582,582,582,582,582,583,583,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
/* block 104 */
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+530,530,530,530,530,530,530,530,530,530,530,530,531,532,532,532,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+533,533,533,533,533,533,533,533,533,533,530,530,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+188,189,188,189,188,189,188,189,188,189,534,535,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,536,193,
+194,194,194,537,193,193,193,193,193,193,193,193,193,193,537,441,
/* block 105 */
-584,584,584,584,584,584,584,584,584,584,584,584,585,586,586,586,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-587,587,587,587,587,587,587,587,587,587,584,584,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-191,192,191,192,191,192,191,192,191,192,588,589,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,590,197,
-199,199,199,591,543,543,543,543,543,543,543,543,543,543,591,472,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,441,441,193,193,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,539,539,539,539,539,539,539,539,539,539,
+540,540,541,541,541,541,541,541,116,116,116,116,116,116,116,116,
/* block 106 */
-191,192,191,192,191,192,191,192,191,192,191,192,191,192,191,192,
-191,192,191,192,191,192,191,192,191,192,191,192,472,472,543,543,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,593,593,593,593,593,593,593,593,593,593,
-594,594,595,595,595,595,595,595,119,119,119,119,119,119,119,119,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14,109,109,109,109,109,109,109,109,109,
+ 14, 14, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 34, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+108, 34, 34, 34, 34, 34, 34, 34, 34, 31, 32, 31, 32,542, 31, 32,
/* block 107 */
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15,110,110,110,110,110,110,110,110,110,
- 15, 15, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 35, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
-109, 35, 35, 35, 35, 35, 35, 35, 35, 32, 33, 32, 33,596, 32, 33,
+ 31, 32, 31, 32, 31, 32, 31, 32,109, 14, 14, 31, 32,543, 34, 21,
+ 31, 32, 31, 32, 34, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,544,545,546,547,544, 34,
+548,549,550,551, 31, 32, 31, 32, 31, 32,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116, 21,108,108, 34, 21, 21, 21, 21, 21,
/* block 108 */
- 32, 33, 32, 33, 32, 33, 32, 33,110, 15, 15, 32, 33,597, 35, 22,
- 32, 33, 32, 33, 35, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,598,599,600,601,598, 35,
-602,603,604,605, 32, 33, 32, 33, 32, 33,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119, 22,109,109, 35, 22, 22, 22, 22, 22,
+552,552,553,552,552,552,553,552,552,552,552,553,552,552,552,552,
+552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
+552,552,552,554,554,553,553,554,555,555,555,555,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 19, 19, 5, 19,116,116,116,116,116,116,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,557,557,557,557,116,116,116,116,116,116,116,116,
/* block 109 */
-606,606,607,606,606,606,607,606,606,606,606,607,606,606,606,606,
-606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
-606,606,606,608,608,607,607,608,609,609,609,609,119,119,119,119,
-610,610,610,611,611,611,612,612,613,612,119,119,119,119,119,119,
-614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,
-614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,
-614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,
-614,614,614,614,615,615,615,615,119,119,119,119,119,119,119,119,
+558,558,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
+559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
+559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
+559,559,559,559,558,558,558,558,558,558,558,558,558,558,558,558,
+558,558,558,558,560,560,116,116,116,116,116,116,116,116,561,561,
+562,562,562,562,562,562,562,562,562,562,116,116,116,116,116,116,
+240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,242,242,242,242,242,242,244,244,244,242,244,242,242,240,
/* block 110 */
-616,616,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,618,618,119,119,119,119,119,119,119,119,619,619,
-620,620,620,620,620,620,620,620,620,620,119,119,119,119,119,119,
-251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,
-251,621,253,622,253,253,253,253,259,259,259,253,259,253,253,251,
+563,563,563,563,563,563,563,563,563,563,564,564,564,564,564,564,
+564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
+564,564,564,564,564,564,565,565,565,565,565,565,565,565, 4,566,
+567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,
+567,567,567,567,567,567,567,568,568,568,568,568,568,568,568,568,
+568,568,569,569,116,116,116,116,116,116,116,116,116,116,116,570,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,116,116,116,
/* block 111 */
-623,623,623,623,623,623,623,623,623,623,624,624,624,624,624,624,
-624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,
-624,624,624,624,624,624,625,625,625,625,625,625,625,625,626,627,
-628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
-628,628,628,628,628,628,628,629,629,629,629,629,629,629,629,629,
-629,629,630,630,119,119,119,119,119,119,119,119,119,119,119,631,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,355,355,355,355,355,355,355,355,355,355,355,119,119,119,
+571,571,571,572,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,571,572,572,571,571,571,571,572,572,571,572,572,572,
+572,574,574,574,574,574,574,574,574,574,574,574,574,574,116,109,
+575,575,575,575,575,575,575,575,575,575,116,116,116,116,574,574,
+327,327,327,327,327,329,576,327,327,327,327,327,327,327,327,327,
+331,331,331,331,331,331,331,331,331,331,327,327,327,327,327,116,
/* block 112 */
-632,632,632,633,634,634,634,634,634,634,634,634,634,634,634,634,
-634,634,634,634,634,634,634,634,634,634,634,634,634,634,634,634,
-634,634,634,634,634,634,634,634,634,634,634,634,634,634,634,634,
-634,634,634,632,633,633,632,632,632,632,633,633,632,633,633,633,
-633,635,635,635,635,635,635,635,635,635,635,635,635,635,119,636,
-637,637,637,637,637,637,637,637,637,637,119,119,119,119,635,635,
-343,343,343,343,343,345,638,343,343,343,343,343,343,343,343,343,
-349,349,349,349,349,349,349,349,349,349,343,343,343,343,343,119,
+577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+577,577,577,577,577,577,577,577,577,578,578,578,578,578,578,579,
+579,578,578,579,579,578,578,116,116,116,116,116,116,116,116,116,
+577,577,577,578,577,577,577,577,577,577,577,577,578,579,116,116,
+580,580,580,580,580,580,580,580,580,580,116,116,581,581,581,581,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+576,327,327,327,327,327,327,333,333,333,327,328,329,328,327,327,
/* block 113 */
-639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
-639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
-639,639,639,639,639,639,639,639,639,640,640,640,640,640,640,641,
-641,640,640,641,641,640,640,119,119,119,119,119,119,119,119,119,
-639,639,639,640,639,639,639,639,639,639,639,639,640,641,119,119,
-642,642,642,642,642,642,642,642,642,642,119,119,643,643,643,643,
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-638,343,343,343,343,343,343,350,350,350,343,344,345,344,343,343,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+583,582,583,583,583,582,582,583,583,582,582,582,582,582,583,583,
+582,583,582,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,582,582,584,585,585,
+586,586,586,586,586,586,586,586,586,586,586,587,588,588,587,587,
+589,589,586,590,590,587,588,116,116,116,116,116,116,116,116,116,
/* block 114 */
-644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,
-644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,
-644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,
-645,644,645,645,645,644,644,645,645,644,644,644,644,644,645,645,
-644,645,644,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,644,644,646,647,647,
-648,648,648,648,648,648,648,648,648,648,648,649,650,650,649,649,
-651,651,648,652,652,649,650,119,119,119,119,119,119,119,119,119,
+116,340,340,340,340,340,340,116,116,340,340,340,340,340,340,116,
+116,340,340,340,340,340,340,116,116,116,116,116,116,116,116,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34,591, 34, 34, 34, 34, 34, 34, 34, 14,108,108,108,108,
+ 34, 34, 34, 34, 34,124,116,116,116,116,116,116,116,116,116,116,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
/* block 115 */
-119,358,358,358,358,358,358,119,119,358,358,358,358,358,358,119,
-119,358,358,358,358,358,358,119,119,119,119,119,119,119,119,119,
-358,358,358,358,358,358,358,119,358,358,358,358,358,358,358,119,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35,653, 35, 35, 35, 35, 35, 35, 35, 15,109,109,109,109,
- 35, 35, 35, 35, 35,127,119,119,119,119,119,119,119,119,119,119,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,587,587,588,587,587,588,587,587,589,587,588,116,116,
+593,593,593,593,593,593,593,593,593,593,116,116,116,116,116,116,
/* block 116 */
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
-648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
-648,648,648,649,649,650,649,649,650,649,649,651,649,650,119,119,
-655,655,655,655,655,655,655,655,655,655,119,119,119,119,119,119,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 117 */
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
/* block 118 */
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 119 */
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
/* block 120 */
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 121 */
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
/* block 122 */
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 123 */
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-656,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,656,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,656,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,116,116,116,116,116,116,116,116,116,116,116,116,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,116,116,116,116,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,116,116,116,116,
/* block 124 */
-657,657,657,657,657,657,657,657,656,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,119,119,119,119,119,119,119,119,119,119,119,119,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,119,119,119,119,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,119,119,119,119,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
/* block 125 */
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
/* block 126 */
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 127 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,119,119,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 128 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+ 34, 34, 34, 34, 34, 34, 34,116,116,116,116,116,116,116,116,116,
+116,116,116,200,200,200,200,200,116,116,116,116,116,208,205,208,
+208,208,208,208,208,208,208,208,208,598,208,208,208,208,208,208,
+208,208,208,208,208,208,208,116,208,208,208,208,208,116,208,116,
+208,208,116,208,208,116,208,208,208,208,208,208,208,208,208,208,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 129 */
- 35, 35, 35, 35, 35, 35, 35,119,119,119,119,119,119,119,119,119,
-119,119,119,205,205,205,205,205,119,119,119,119,119,214,211,214,
-214,214,214,214,214,214,214,214,214,660,214,214,214,214,214,214,
-214,214,214,214,214,214,214,119,214,214,214,214,214,119,214,119,
-214,214,119,214,214,119,214,214,214,214,214,214,214,214,214,214,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
+599,599,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 130 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,661,661,661,661,661,661,661,661,661,661,661,661,661,661,
-661,661,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 131 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217, 7, 6,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 132 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224, 8, 7,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+116,116,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,213,214,116,116,
/* block 133 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-119,119,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-224,224,662,224,224,224,224,224,224,224,224,224,219,663,119,119,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 4, 4, 4, 4, 4, 4, 4, 6, 7, 4,116,116,116,116,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,193,193,
+ 4, 9, 9, 15, 15, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
+ 7, 6, 7, 6, 7, 4, 4, 6, 7, 4, 4, 4, 4, 15, 15, 15,
+ 4, 4, 4,116, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4,
+ 4, 4, 8, 9, 8, 8, 8,116, 4, 5, 4, 4,116,116,116,116,
+217,217,217,217,217,116,217,217,217,217,217,217,217,217,217,217,
/* block 134 */
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
- 5, 5, 5, 5, 5, 5, 5, 7, 8, 5,119,119,119,119,119,119,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,543,543,
- 5, 10, 10, 16, 16, 7, 8, 7, 8, 7, 8, 7, 8, 7, 8, 7,
- 8, 7, 8, 7, 8,547,547, 7, 8, 5, 5, 5, 5, 16, 16, 16,
- 5, 5, 5,119, 5, 5, 5, 5, 10, 7, 8, 7, 8, 7, 8, 5,
- 5, 5, 9, 10, 9, 9, 9,119, 5, 6, 5, 5,119,119,119,119,
-224,224,224,224,224,119,224,224,224,224,224,224,224,224,224,224,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,116,116, 23,
/* block 135 */
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,119,119, 24,
+116, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 8, 8, 8, 4,
+ 4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 4, 7, 14, 15,
+ 14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 6, 8, 7, 8, 6,
+ 7, 4, 6, 7, 4, 4,517,517,517,517,517,517,517,517,517,517,
+109,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
/* block 136 */
-119, 5, 5, 5, 6, 5, 5, 5, 7, 8, 5, 9, 5, 10, 5, 5,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 9, 9, 9, 5,
- 5, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 5, 8, 15, 16,
- 15, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 7, 9, 8, 9, 7,
- 8,546,551,552,546,546,569,569,569,569,569,569,569,569,569,569,
-560,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,600,600,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,116,
+116,116,520,520,520,520,520,520,116,116,520,520,520,520,520,520,
+116,116,520,520,520,520,520,520,116,116,520,520,520,116,116,116,
+ 5, 5, 8, 14, 19, 5, 5,116, 19, 8, 8, 8, 8, 19, 19,116,
+471,471,471,471,471,471,471,471,471, 23, 23, 23, 19, 19,116,116,
/* block 137 */
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,664,664,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,119,
-119,119,572,572,572,572,572,572,119,119,572,572,572,572,572,572,
-119,119,572,572,572,572,572,572,119,119,572,572,572,119,119,119,
- 6, 6, 9, 15, 20, 6, 6,119, 20, 9, 9, 9, 9, 20, 20,119,
-502,502,502,502,502,502,502,502,502, 24, 24, 24, 20, 20,119,119,
+601,601,601,601,601,601,601,601,601,601,601,601,116,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,116,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,116,601,601,116,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,116,116,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 138 */
-665,665,665,665,665,665,665,665,665,665,665,665,119,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,119,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,119,665,665,119,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,119,119,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,116,116,116,116,116,
/* block 139 */
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,119,119,119,119,119,
+ 4, 4, 4,116,116,116,116, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24,116,116,116, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,603,603,603,603,604,604,604,604,604,604,604,
/* block 140 */
-666,666,666,119,119,119,119,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,119,119,119,668,668,668,668,668,668,668,668,668,
-669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,
-669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,
-669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,669,
-669,669,669,669,669,670,670,670,670,671,671,671,671,671,671,671,
+604,604,604,604,604,604,604,604,604,604,603,603,604,604,604,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,
+604,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,111,116,116,
/* block 141 */
-671,671,671,671,671,671,671,671,671,671,670,670,671,671,671,119,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,119,119,119,119,
-671,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,112,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 142 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
+605,605,605,605,605,605,605,605,605,605,605,605,605,116,116,116,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+111, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,116,116,116,116,
/* block 143 */
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,119,119,119,
-673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
-673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
-673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
-673,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-674,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
-675,675,675,675,675,675,675,675,675,675,675,675,119,119,119,119,
+607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
+607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
+608,608,608,608,116,116,116,116,116,116,116,116,116,607,607,607,
+609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,
+609,610,609,609,609,609,609,609,609,609,610,116,116,116,116,116,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,612,612,612,612,612,116,116,116,116,116,
/* block 144 */
-676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
-676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
-677,677,677,677,119,119,119,119,119,119,119,119,119,676,676,676,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,679,678,678,678,678,678,678,678,678,679,119,119,119,119,119,
-680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,
-680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,
-680,680,680,680,680,680,681,681,681,681,681,119,119,119,119,119,
+613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,
+613,613,613,613,613,613,613,613,613,613,613,613,613,613,116,614,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,116,116,116,116,615,615,615,615,615,615,615,615,
+616,617,617,617,617,617,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 145 */
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,119,683,
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,684,684,119,119,119,119,684,684,684,684,684,684,684,684,
-685,686,686,686,686,686,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
/* block 146 */
-687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
-687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
-687,687,687,687,687,687,687,687,688,688,688,688,688,688,688,688,
-688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,
-688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,
-689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
-689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
-689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,116,116,
+622,622,622,622,622,622,622,622,622,622,116,116,116,116,116,116,
+623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,
+623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,
+623,623,623,623,116,116,116,116,624,624,624,624,624,624,624,624,
+624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,
+624,624,624,624,624,624,624,624,624,624,624,624,116,116,116,116,
/* block 147 */
-690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,
-690,690,690,690,690,690,690,690,690,690,690,690,690,690,119,119,
-691,691,691,691,691,691,691,691,691,691,119,119,119,119,119,119,
-692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,
-692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,
-692,692,692,692,119,119,119,119,693,693,693,693,693,693,693,693,
-693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
-693,693,693,693,693,693,693,693,693,693,693,693,119,119,119,119,
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,625,116,116,116,116,116,116,116,116,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,116,116,116,116,116,116,116,116,116,116,116,627,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 148 */
-694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,
-694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,
-694,694,694,694,694,694,694,694,119,119,119,119,119,119,119,119,
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,119,119,119,119,119,119,119,119,119,119,119,696,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
/* block 149 */
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,116,116,116,116,116,116,116,116,116,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,116,116,116,116,116,116,116,116,116,116,
+628,628,628,628,628,628,628,628,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 150 */
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,119,119,119,119,119,119,119,119,119,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,119,119,119,119,119,119,119,119,119,119,
-697,697,697,697,697,697,697,697,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+629,629,629,629,629,629,116,116,629,116,629,629,629,629,629,629,
+629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
+629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
+629,629,629,629,629,629,116,629,629,116,116,116,629,116,116,629,
+630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
+630,630,630,630,630,630,116,631,632,632,632,632,632,632,632,632,
+633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,
+633,633,633,633,633,633,633,634,634,635,635,635,635,635,635,635,
/* block 151 */
-698,698,698,698,698,698,119,119,698,119,698,698,698,698,698,698,
-698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,
-698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,
-698,698,698,698,698,698,119,698,698,119,119,119,698,119,119,698,
-699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,
-699,699,699,699,699,699,119,700,701,701,701,701,701,701,701,701,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-702,702,702,702,702,702,702,703,703,704,704,704,704,704,704,704,
+636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,
+636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,116,
+116,116,116,116,116,116,116,637,637,637,637,637,637,637,637,637,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
+638,638,638,116,638,638,116,116,116,116,116,639,639,639,639,639,
/* block 152 */
-705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,
-705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,119,
-119,119,119,119,119,119,119,706,706,706,706,706,706,706,706,706,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,119,707,707,119,119,119,119,119,708,708,708,708,708,
+640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,
+640,640,640,640,640,640,641,641,641,641,641,641,116,116,116,642,
+643,643,643,643,643,643,643,643,643,643,643,643,643,643,643,643,
+643,643,643,643,643,643,643,643,643,643,116,116,116,116,116,644,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 153 */
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,710,710,710,710,710,710,119,119,119,711,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,119,119,119,119,119,713,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,
+645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,
+646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,
+646,646,646,646,646,646,646,646,116,116,116,116,647,647,646,646,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+116,116,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
/* block 154 */
-714,714,714,714,714,714,714,714,714,714,714,714,714,714,714,714,
-714,714,714,714,714,714,714,714,714,714,714,714,714,714,714,714,
-715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
-715,715,715,715,715,715,715,715,119,119,119,119,716,716,715,715,
-716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
-119,119,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
-716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
-716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
+648,649,649,649,116,649,649,116,116,116,116,116,649,649,649,649,
+648,648,648,648,116,648,648,648,116,648,648,648,648,648,648,648,
+648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
+648,648,648,648,648,648,116,116,649,649,649,116,116,116,116,649,
+650,650,650,650,650,650,650,650,650,116,116,116,116,116,116,116,
+651,651,651,651,651,651,651,651,651,116,116,116,116,116,116,116,
+652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
+652,652,652,652,652,652,652,652,652,652,652,652,652,653,653,654,
/* block 155 */
-717,718,718,718,119,718,718,119,119,119,119,119,718,718,718,718,
-717,717,717,717,119,717,717,717,119,717,717,717,717,717,717,717,
-717,717,717,717,717,717,717,717,717,717,717,717,717,717,717,717,
-717,717,717,717,717,717,119,119,718,718,718,119,119,119,119,718,
-719,719,719,719,719,719,719,719,719,119,119,119,119,119,119,119,
-720,720,720,720,720,720,720,720,720,119,119,119,119,119,119,119,
-721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
-721,721,721,721,721,721,721,721,721,721,721,721,721,722,722,723,
+655,655,655,655,655,655,655,655,655,655,655,655,655,655,655,655,
+655,655,655,655,655,655,655,655,655,655,655,655,655,656,656,656,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+657,657,657,657,657,657,657,657,658,657,657,657,657,657,657,657,
+657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+657,657,657,657,657,659,659,116,116,116,116,660,660,660,660,660,
+661,661,661,661,661,661,661,116,116,116,116,116,116,116,116,116,
/* block 156 */
-724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,
-724,724,724,724,724,724,724,724,724,724,724,724,724,725,725,725,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-726,726,726,726,726,726,726,726,727,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,728,728,119,119,119,119,729,729,729,729,729,
-730,730,730,730,730,730,730,119,119,119,119,119,119,119,119,119,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,116,116,116,663,663,663,663,663,663,663,
+664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
+664,664,664,664,664,664,116,116,665,665,665,665,665,665,665,665,
+666,666,666,666,666,666,666,666,666,666,666,666,666,666,666,666,
+666,666,666,116,116,116,116,116,667,667,667,667,667,667,667,667,
/* block 157 */
-731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
-731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
-731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
-731,731,731,731,731,731,119,119,119,732,732,732,732,732,732,732,
-733,733,733,733,733,733,733,733,733,733,733,733,733,733,733,733,
-733,733,733,733,733,733,119,119,734,734,734,734,734,734,734,734,
-735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,
-735,735,735,119,119,119,119,119,736,736,736,736,736,736,736,736,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+668,668,116,116,116,116,116,116,116,669,669,669,669,116,116,116,
+116,116,116,116,116,116,116,116,116,670,670,670,670,670,670,670,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 158 */
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,119,119,119,119,119,119,119,738,738,738,738,119,119,119,
-119,119,119,119,119,119,119,119,119,739,739,739,739,739,739,739,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 159 */
-740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,
-740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,
-740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,
-740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,
-740,740,740,740,740,740,740,740,740,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,116,116,116,116,116,116,116,116,116,116,116,116,116,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,116,116,116,116,116,116,116,674,674,674,674,674,674,
/* block 160 */
-741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,
-741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,
-741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,
-741,741,741,119,119,119,119,119,119,119,119,119,119,119,119,119,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
-742,742,742,119,119,119,119,119,119,119,743,743,743,743,743,743,
+675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
+675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
+675,675,675,675,676,676,676,676,116,116,116,116,116,116,116,116,
+677,677,677,677,677,677,677,677,677,677,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 161 */
-744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,
-744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,
-744,744,744,744,745,745,745,745,119,119,119,119,119,119,119,119,
-746,746,746,746,746,746,746,746,746,746,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
+678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,116,
/* block 162 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
-747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,119,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,680,680,680,
+680,680,680,680,680,680,680,679,116,116,116,116,116,116,116,116,
+681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
+681,681,681,681,681,681,682,682,682,682,682,682,682,682,682,682,
+682,683,683,683,683,684,684,684,684,684,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 163 */
-748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
-748,748,748,748,748,748,748,748,748,748,748,748,748,749,749,749,
-749,749,749,749,749,749,749,748,119,119,119,119,119,119,119,119,
-750,750,750,750,750,750,750,750,750,750,750,750,750,750,750,750,
-750,750,750,750,750,750,751,751,751,751,751,751,751,751,751,751,
-751,752,752,752,752,753,753,753,753,753,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+685,686,685,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,686,686,686,686,686,686,686,686,
+686,686,686,686,686,686,686,688,688,688,688,688,688,688,116,116,
+116,116,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
+689,689,689,689,689,689,690,690,690,690,690,690,690,690,690,690,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,686,
/* block 164 */
-754,755,754,756,756,756,756,756,756,756,756,756,756,756,756,756,
-756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,
-756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,
-756,756,756,756,756,756,756,756,755,755,755,755,755,755,755,755,
-755,755,755,755,755,755,755,757,757,757,757,757,757,757,119,119,
-119,119,758,758,758,758,758,758,758,758,758,758,758,758,758,758,
-758,758,758,758,758,758,759,759,759,759,759,759,759,759,759,759,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,755,
+691,691,692,693,693,693,693,693,693,693,693,693,693,693,693,693,
+693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
+693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
+692,692,692,691,691,691,691,692,692,691,691,694,694,695,694,694,
+694,694,116,116,116,116,116,116,116,116,116,116,116,695,116,116,
+696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,
+696,696,696,696,696,696,696,696,696,116,116,116,116,116,116,116,
+697,697,697,697,697,697,697,697,697,697,116,116,116,116,116,116,
/* block 165 */
-760,760,761,762,762,762,762,762,762,762,762,762,762,762,762,762,
-762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,
-762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,
-761,761,761,760,760,760,760,761,761,760,760,763,763,764,763,763,
-763,763,119,119,119,119,119,119,119,119,119,119,119,764,119,119,
-765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
-765,765,765,765,765,765,765,765,765,119,119,119,119,119,119,119,
-766,766,766,766,766,766,766,766,766,766,119,119,119,119,119,119,
+698,698,698,699,699,699,699,699,699,699,699,699,699,699,699,699,
+699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,
+699,699,699,699,699,699,699,698,698,698,698,698,700,698,698,698,
+698,698,698,698,698,116,701,701,701,701,701,701,701,701,701,701,
+702,702,702,702,699,700,700,116,116,116,116,116,116,116,116,116,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,704,705,705,703,116,116,116,116,116,116,116,116,116,
/* block 166 */
-767,767,767,768,768,768,768,768,768,768,768,768,768,768,768,768,
-768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
-768,768,768,768,768,768,768,767,767,767,767,767,769,767,767,767,
-767,767,767,767,767,119,770,770,770,770,770,770,770,770,770,770,
-771,771,771,771,768,769,769,119,119,119,119,119,119,119,119,119,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,773,774,774,772,119,119,119,119,119,119,119,119,119,
+706,706,707,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,707,707,707,706,706,706,706,706,706,706,706,706,707,
+707,708,709,709,708,710,710,710,710,706,706,706,706,710,116,116,
+711,711,711,711,711,711,711,711,711,711,708,710,708,710,710,710,
+116,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
+712,712,712,712,712,116,116,116,116,116,116,116,116,116,116,116,
/* block 167 */
-775,775,776,777,777,777,777,777,777,777,777,777,777,777,777,777,
-777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,
-777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,
-777,777,777,776,776,776,775,775,775,775,775,775,775,775,775,776,
-776,777,778,778,777,779,779,779,779,775,775,775,775,779,119,119,
-780,780,780,780,780,780,780,780,780,780,777,779,777,779,779,779,
-119,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,
-781,781,781,781,781,119,119,119,119,119,119,119,119,119,119,119,
+713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,
+713,713,116,713,713,713,713,713,713,713,713,713,713,713,713,713,
+713,713,713,713,713,713,713,713,713,713,713,713,714,714,714,715,
+715,715,714,714,715,714,715,715,716,716,716,716,716,716,715,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 168 */
-782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,
-782,782,119,782,782,782,782,782,782,782,782,782,782,782,782,782,
-782,782,782,782,782,782,782,782,782,782,782,782,783,783,783,784,
-784,784,783,783,784,783,784,784,785,785,785,785,785,785,784,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+717,717,717,717,717,717,717,116,717,116,717,717,717,717,116,717,
+717,717,717,717,717,717,717,717,717,717,717,717,717,717,116,717,
+717,717,717,717,717,717,717,717,717,718,116,116,116,116,116,116,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,720,
+721,721,721,720,720,720,720,720,720,720,720,116,116,116,116,116,
+722,722,722,722,722,722,722,722,722,722,116,116,116,116,116,116,
/* block 169 */
-786,786,786,786,786,786,786,119,786,119,786,786,786,786,119,786,
-786,786,786,786,786,786,786,786,786,786,786,786,786,786,119,786,
-786,786,786,786,786,786,786,786,786,787,119,119,119,119,119,119,
-788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,
-788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,
-788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,789,
-790,790,790,789,789,789,789,789,789,789,789,119,119,119,119,119,
-791,791,791,791,791,791,791,791,791,791,119,119,119,119,119,119,
+723,723,724,724,116,725,725,725,725,725,725,725,725,116,116,725,
+725,116,116,725,725,725,725,725,725,725,725,725,725,725,725,725,
+725,725,725,725,725,725,725,725,725,116,725,725,725,725,725,725,
+725,116,725,725,116,725,725,725,725,725,116,111,723,725,726,724,
+723,724,724,724,724,116,116,724,724,116,116,724,724,724,116,116,
+725,116,116,116,116,116,116,726,116,116,116,116,116,725,725,725,
+725,725,724,724,116,116,723,723,723,723,723,723,723,116,116,116,
+723,723,723,723,723,116,116,116,116,116,116,116,116,116,116,116,
/* block 170 */
-792,793,794,795,119,796,796,796,796,796,796,796,796,119,119,796,
-796,119,119,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,119,796,796,796,796,796,796,
-796,119,796,796,119,796,796,796,796,796,119,797,793,796,798,794,
-792,794,794,794,794,119,119,794,794,119,119,794,794,794,119,119,
-796,119,119,119,119,119,119,798,119,119,119,119,119,796,796,796,
-796,796,794,794,119,119,792,792,792,792,792,792,792,119,119,119,
-792,792,792,792,792,119,119,119,119,119,119,119,119,119,119,119,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,728,728,728,729,729,729,729,729,729,729,729,
+728,728,729,729,729,728,729,727,727,727,727,730,730,730,730,730,
+731,731,731,731,731,731,731,731,731,731,116,730,116,730,729,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 171 */
-799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,
-799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,
-799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,799,
-799,799,799,799,799,800,800,800,801,801,801,801,801,801,801,801,
-800,800,801,801,801,800,801,799,799,799,799,802,802,802,802,802,
-803,803,803,803,803,803,803,803,803,803,119,802,119,802,801,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+733,734,734,735,735,735,735,735,735,734,735,734,734,733,734,735,
+735,734,735,735,732,732,736,732,116,116,116,116,116,116,116,116,
+737,737,737,737,737,737,737,737,737,737,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 172 */
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-805,806,806,807,807,807,807,807,807,806,807,806,806,805,806,807,
-807,806,807,807,804,804,808,804,119,119,119,119,119,119,119,119,
-809,809,809,809,809,809,809,809,809,809,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,739,
+740,740,741,741,741,741,116,116,740,740,740,740,741,741,740,741,
+741,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
+742,742,742,742,742,742,742,742,738,738,738,738,741,741,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 173 */
-810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,
-810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,
-810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,811,
-812,812,813,813,813,813,119,119,812,812,812,812,813,813,812,813,
-813,814,814,814,814,814,814,814,814,814,814,814,814,814,814,814,
-814,814,814,814,814,814,814,814,810,810,810,810,813,813,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+744,744,744,745,745,745,745,745,745,745,745,744,744,745,744,745,
+745,746,746,746,743,116,116,116,116,116,116,116,116,116,116,116,
+747,747,747,747,747,747,747,747,747,747,116,116,116,116,116,116,
+373,373,373,373,373,373,373,373,373,373,373,373,373,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 174 */
-815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,
-815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,
-815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,
-816,816,816,817,817,817,817,817,817,817,817,816,816,817,816,817,
-817,818,818,818,815,119,119,119,119,119,119,119,119,119,119,119,
-819,819,819,819,819,819,819,819,819,819,119,119,119,119,119,119,
-392,392,392,392,392,392,392,392,392,392,392,392,392,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
+748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
+748,748,748,748,748,748,748,748,748,748,748,749,750,749,750,750,
+749,749,749,749,749,749,750,749,116,116,116,116,116,116,116,116,
+751,751,751,751,751,751,751,751,751,751,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 175 */
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,821,822,821,822,822,
-821,821,821,821,821,821,822,821,119,119,119,119,119,119,119,119,
-823,823,823,823,823,823,823,823,823,823,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
+752,752,752,752,752,752,752,752,752,752,752,116,116,753,753,753,
+754,754,753,753,753,753,754,753,753,753,753,753,116,116,116,116,
+755,755,755,755,755,755,755,755,755,755,756,756,757,757,757,758,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 176 */
-824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
-824,824,824,824,824,824,824,824,824,824,824,119,119,825,825,825,
-826,826,825,825,825,825,826,825,825,825,825,825,119,119,119,119,
-827,827,827,827,827,827,827,827,827,827,828,828,829,829,829,830,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,
+759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,
+759,759,759,759,759,759,759,759,759,759,759,759,760,760,760,761,
+761,761,761,761,761,761,761,761,760,761,761,762,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 177 */
-831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
-831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
-831,831,831,831,831,831,831,831,831,831,831,831,832,832,832,833,
-833,833,833,833,833,833,833,833,832,833,833,834,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,
+763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,
+764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,
+764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,
+765,765,765,765,765,765,765,765,765,765,766,766,766,766,766,766,
+766,766,766,116,116,116,116,116,116,116,116,116,116,116,116,767,
/* block 178 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,
-835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,
-836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,
-836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,
-837,837,837,837,837,837,837,837,837,837,838,838,838,838,838,838,
-838,838,838,119,119,119,119,119,119,119,119,119,119,119,119,839,
+768,769,769,769,769,769,769,769,769,769,769,768,768,768,768,768,
+768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
+768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
+768,768,768,769,769,769,769,769,769,770,771,769,769,769,769,772,
+772,772,772,772,772,772,772,769,116,116,116,116,116,116,116,116,
+773,774,774,774,774,774,774,775,775,774,774,774,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
/* block 179 */
-840,841,841,841,841,841,841,841,841,841,841,840,840,840,840,840,
-840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,
-840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,
-840,840,840,841,841,841,841,841,841,842,843,841,841,841,841,844,
-844,844,844,844,844,844,844,841,119,119,119,119,119,119,119,119,
-845,846,846,846,846,846,846,847,847,846,846,846,845,845,845,845,
-845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
-845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
+773,773,773,773,116,116,776,776,776,776,774,774,774,774,774,774,
+774,774,774,774,774,774,774,775,774,774,777,777,777,773,777,777,
+777,777,777,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,116,116,116,116,116,116,116,
/* block 180 */
-845,845,845,845,119,119,848,848,848,848,846,846,846,846,846,846,
-846,846,846,846,846,846,846,847,846,846,849,849,849,845,849,849,
-849,849,849,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
-850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
-850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
-850,850,850,850,850,850,850,850,850,119,119,119,119,119,119,119,
+779,779,779,779,779,779,779,779,779,116,779,779,779,779,779,779,
+779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,
+779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,780,
+781,781,781,781,781,781,781,116,781,781,781,781,781,781,780,781,
+779,782,782,782,782,782,116,116,116,116,116,116,116,116,116,116,
+783,783,783,783,783,783,783,783,783,783,784,784,784,784,784,784,
+784,784,784,784,784,784,784,784,784,784,784,784,784,116,116,116,
+785,785,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
/* block 181 */
-851,851,851,851,851,851,851,851,851,119,851,851,851,851,851,851,
-851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,
-851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,852,
-853,853,853,853,853,853,853,119,853,853,853,853,853,853,852,853,
-851,854,854,854,854,854,119,119,119,119,119,119,119,119,119,119,
-855,855,855,855,855,855,855,855,855,855,856,856,856,856,856,856,
-856,856,856,856,856,856,856,856,856,856,856,856,856,119,119,119,
-857,857,858,858,858,858,858,858,858,858,858,858,858,858,858,858,
+786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
+116,116,787,787,787,787,787,787,787,787,787,787,787,787,787,787,
+787,787,787,787,787,787,787,787,116,788,787,787,787,787,787,787,
+787,788,787,787,788,787,787,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 182 */
-858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,
-119,119,859,859,859,859,859,859,859,859,859,859,859,859,859,859,
-859,859,859,859,859,859,859,859,119,860,859,859,859,859,859,859,
-859,860,859,859,860,859,859,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+789,789,789,789,789,789,789,116,789,789,116,789,789,789,789,789,
+789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,
+789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,
+789,790,790,790,790,790,790,116,116,116,790,116,790,790,116,790,
+790,790,790,790,790,790,791,790,116,116,116,116,116,116,116,116,
+792,792,792,792,792,792,792,792,792,792,116,116,116,116,116,116,
+793,793,793,793,793,793,116,793,793,116,793,793,793,793,793,793,
+793,793,793,793,793,793,793,793,793,793,793,793,793,793,793,793,
/* block 183 */
-861,861,861,861,861,861,861,119,861,861,119,861,861,861,861,861,
-861,861,861,861,861,861,861,861,861,861,861,861,861,861,861,861,
-861,861,861,861,861,861,861,861,861,861,861,861,861,861,861,861,
-861,862,862,862,862,862,862,119,119,119,862,119,862,862,119,862,
-862,862,862,862,862,862,863,862,119,119,119,119,119,119,119,119,
-864,864,864,864,864,864,864,864,864,864,119,119,119,119,119,119,
-865,865,865,865,865,865,119,865,865,119,865,865,865,865,865,865,
-865,865,865,865,865,865,865,865,865,865,865,865,865,865,865,865,
+793,793,793,793,793,793,793,793,793,793,794,794,794,794,794,116,
+795,795,116,794,794,795,794,795,793,116,116,116,116,116,116,116,
+796,796,796,796,796,796,796,796,796,796,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 184 */
-865,865,865,865,865,865,865,865,865,865,866,866,866,866,866,119,
-867,867,119,866,866,867,866,867,865,119,119,119,119,119,119,119,
-868,868,868,868,868,868,868,868,868,868,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,798,798,799,799,800,800,116,116,116,116,116,116,116,
/* block 185 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-869,869,869,869,869,869,869,869,869,869,869,869,869,869,869,869,
-869,869,869,870,870,871,871,872,872,119,119,119,119,119,119,119,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
/* block 186 */
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 187 */
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,116,
+803,803,803,803,803,116,116,116,116,116,116,116,116,116,116,116,
/* block 188 */
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,874,874,119,
-875,875,875,875,875,119,119,119,119,119,119,119,119,119,119,119,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 189 */
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,873,
-873,873,873,873,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
/* block 190 */
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 191 */
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
/* block 192 */
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 193 */
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
/* block 194 */
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,116,116,116,116,116,116,116,
+806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,
+806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,116,
+807,807,807,807,807,807,807,807,807,807,116,116,116,116,808,808,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 195 */
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
-592,592,592,592,592,592,592,592,592,119,119,119,119,119,119,119,
-878,878,878,878,878,878,878,878,878,878,878,878,878,878,878,878,
-878,878,878,878,878,878,878,878,878,878,878,878,878,878,878,119,
-879,879,879,879,879,879,879,879,879,879,119,119,119,119,880,880,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+809,809,809,809,809,809,809,809,809,809,809,809,809,809,809,809,
+809,809,809,809,809,809,809,809,809,809,809,809,809,809,116,116,
+810,810,810,810,810,811,116,116,116,116,116,116,116,116,116,116,
/* block 196 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,
-881,881,881,881,881,881,881,881,881,881,881,881,881,881,119,119,
-882,882,882,882,882,883,119,119,119,119,119,119,119,119,119,119,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+813,813,813,813,813,813,813,814,814,814,814,814,815,815,815,815,
+816,816,816,816,814,815,116,116,116,116,116,116,116,116,116,116,
+817,817,817,817,817,817,817,817,817,817,116,818,818,818,818,818,
+818,818,116,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,116,116,116,116,116,812,812,812,
/* block 197 */
-884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,
-884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,
-884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,
-885,885,885,885,885,885,885,886,886,886,886,886,887,887,887,887,
-888,888,888,888,886,887,119,119,119,119,119,119,119,119,119,119,
-889,889,889,889,889,889,889,889,889,889,119,890,890,890,890,890,
-890,890,119,884,884,884,884,884,884,884,884,884,884,884,884,884,
-884,884,884,884,884,884,884,884,119,119,119,119,119,884,884,884,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 198 */
-884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,884,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
/* block 199 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,
-891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,
-892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,
-892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,
+821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,
+821,821,821,821,821,821,821,822,822,822,822,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 200 */
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,894,894,894,894,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,116,116,116,116,116,116,116,116,116,116,116,
+823,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
+824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
+824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,116,
/* block 201 */
-895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,
-895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,
-895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,
-895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,
-895,895,895,895,895,119,119,119,119,119,119,119,119,119,119,119,
-895,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,825,
+825,825,825,826,826,826,826,826,826,826,826,826,826,826,826,826,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+827,828,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 202 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,897,
-897,897,897,898,898,898,898,898,898,898,898,898,898,898,898,898,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-899,900,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
/* block 203 */
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 204 */
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 205 */
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,
-901,901,901,119,119,119,119,119,119,119,119,119,119,119,119,119,
+517,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 206 */
-569,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 207 */
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
/* block 208 */
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
/* block 209 */
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,116,116,116,116,
/* block 210 */
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,119,119,119,119,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,116,116,116,116,116,
+831,831,831,831,831,831,831,831,831,831,831,831,831,116,116,116,
/* block 211 */
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,
-903,903,903,903,903,903,903,903,903,903,903,119,119,119,119,119,
-903,903,903,903,903,903,903,903,903,903,903,903,903,119,119,119,
+831,831,831,831,831,831,831,831,831,116,116,116,116,116,116,116,
+831,831,831,831,831,831,831,831,831,831,116,116,832,833,833,834,
+ 23, 23, 23, 23,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 212 */
-903,903,903,903,903,903,903,903,903,119,119,119,119,119,119,119,
-903,903,903,903,903,903,903,903,903,903,119,119,904,905,905,906,
-907,907,907,907,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,116,
/* block 213 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20,119,119,119,119,119,119,119,119,119,119,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19,835,438,111,111,111, 19, 19, 19,438,835,835,
+835,835,835, 23, 23, 23, 23, 23, 23, 23, 23,111,111,111,111,111,
/* block 214 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20,119,119, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20,908,909,112,112,112, 20, 20, 20,909,908,908,
-908,908,908, 24, 24, 24, 24, 24, 24, 24, 24,112,112,112,112,112,
+111,111,111, 19, 19,111,111,111,111,111,111,111, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,111,111,111,111, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 215 */
-112,112,112, 20, 20,112,112,112,112,112,112,112, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,112,112,112,112, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,836,836,836,604,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 216 */
-671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
-671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
-671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
-671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
-671,671,910,910,910,671,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 217 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25,119,119,119,119,119,119,119,119,119,119,119,119,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24,116,116,116,116,116,116,116,
/* block 218 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20,119,119,119,119,119,119,119,119,119,
-573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
-573,573, 25, 25, 25, 25, 25, 25, 25,119,119,119,119,119,119,119,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,473,473,
+473,473,473,473,473,116,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 219 */
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,505,505,
-505,505,505,505,505,119,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
+472,472,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,116,472,472,
+116,116,472,116,116,472,472,116,116,472,472,472,472,116,472,472,
+472,472,472,472,472,472,473,473,473,473,116,473,116,473,473,473,
+473,473,473,473,116,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 220 */
-504,504,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,504,119,504,504,
-119,119,504,119,119,504,504,119,119,504,504,504,504,119,504,504,
-504,504,504,504,504,504,505,505,505,505,119,505,119,505,505,505,
-505,505,505,505,119,505,505,505,505,505,505,505,505,505,505,505,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+473,473,473,473,472,472,116,472,472,472,472,116,116,472,472,472,
+472,472,472,472,472,116,472,472,472,472,472,472,472,116,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,472,472,116,472,472,472,472,116,
+472,472,472,472,472,116,472,116,116,116,472,472,472,472,472,472,
+472,116,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 221 */
-505,505,505,505,504,504,119,504,504,504,504,119,119,504,504,504,
-504,504,504,504,504,119,504,504,504,504,504,504,504,119,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,504,504,119,504,504,504,504,119,
-504,504,504,504,504,119,504,119,119,119,504,504,504,504,504,504,
-504,119,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
+472,472,472,472,472,472,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 222 */
-504,504,504,504,504,504,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+473,473,473,473,473,473,473,473,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 223 */
-505,505,505,505,505,505,505,505,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,116,116,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472, 8,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473, 8,473,473,473,473,
+473,473,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472, 8,473,473,473,473,
/* block 224 */
-504,504,504,504,504,504,504,504,504,504,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,119,119,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504, 9,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505, 9,505,505,505,505,
-505,505,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504, 9,505,505,505,505,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473, 8,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472, 8,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, 8,
+473,473,473,473,473,473,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, 8,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 225 */
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505, 9,505,505,505,505,505,505,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504, 9,505,505,505,505,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505, 9,
-505,505,505,505,505,505,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504, 9,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+473,473,473,473,473,473,473,473,473, 8,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472, 8,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473, 8,473,473,473,473,473,473,472,473,116,116, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
/* block 226 */
-505,505,505,505,505,505,505,505,505, 9,505,505,505,505,505,505,
-504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,504,
-504,504,504,504,504,504,504,504,504, 9,505,505,505,505,505,505,
-505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
-505,505,505, 9,505,505,505,505,505,505,504,505,119,119, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
/* block 227 */
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
-911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,837,837,837,837,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,837,837,837,
+837,837,837,837,837,838,837,837,837,837,837,837,837,837,837,837,
/* block 228 */
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,911,911,911,911,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,911,911,911,
-911,911,911,911,911,912,911,911,911,911,911,911,911,911,911,911,
+837,837,837,837,838,837,837,839,839,839,839,839,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,838,838,838,838,838,
+116,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 229 */
-911,911,911,911,912,911,911,913,913,913,913,913,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,912,912,912,912,912,
-119,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+840,840,840,840,840,840,840,116,840,840,840,840,840,840,840,840,
+840,840,840,840,840,840,840,840,840,116,116,840,840,840,840,840,
+840,840,116,840,840,116,840,840,840,840,840,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 230 */
-914,914,914,914,914,914,914,119,914,914,914,914,914,914,914,914,
-914,914,914,914,914,914,914,914,914,119,119,914,914,914,914,914,
-914,914,119,914,914,119,914,914,914,914,914,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
/* block 231 */
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,116,116,842,842,842,842,842,842,842,842,842,
+843,843,843,843,843,843,843,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 232 */
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
-915,915,915,915,915,119,119,916,916,916,916,916,916,916,916,916,
-917,917,917,917,917,917,917,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,
+844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,
+844,844,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
+845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
+845,845,845,845,846,846,846,846,846,846,846,116,116,116,116,116,
+847,847,847,847,847,847,847,847,847,847,116,116,116,116,848,848,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 233 */
-918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
-918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
-918,918,919,919,919,919,919,919,919,919,919,919,919,919,919,919,
-919,919,919,919,919,919,919,919,919,919,919,919,919,919,919,919,
-919,919,919,919,920,920,920,920,920,920,920,119,119,119,119,119,
-921,921,921,921,921,921,921,921,921,921,119,119,119,119,922,922,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 234 */
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 24, 24, 24,
+ 5, 24, 24, 24, 24,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 235 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25, 25, 25,
- 6, 25, 25, 25, 25,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+217,217,217,217,116,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+116,217,217,116,217,116,116,217,116,217,217,217,217,217,217,217,
+217,217,217,116,217,217,217,217,116,217,116,217,116,116,116,116,
+116,116,217,116,116,116,116,217,116,217,116,217,116,217,217,217,
+116,217,217,116,217,116,116,217,116,217,116,217,116,217,116,217,
+116,217,217,116,217,116,116,217,217,217,217,116,217,217,217,217,
+217,217,217,116,217,217,217,217,116,217,217,217,217,116,217,116,
/* block 236 */
-224,224,224,224,119,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-119,224,224,119,224,119,119,224,119,224,224,224,224,224,224,224,
-224,224,224,119,224,224,224,224,119,224,119,224,119,119,119,119,
-119,119,224,119,119,119,119,224,119,224,119,224,119,224,224,224,
-119,224,224,119,224,119,119,224,119,224,119,224,119,224,119,224,
-119,224,224,119,224,119,119,224,224,224,224,119,224,224,224,224,
-224,224,224,119,224,224,224,224,119,224,224,224,224,119,224,119,
+217,217,217,217,217,217,217,217,217,217,116,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,116,116,116,116,
+116,217,217,217,116,217,217,217,217,217,116,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+211,211,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 237 */
-224,224,224,224,224,224,224,224,224,224,119,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,119,119,119,119,
-119,224,224,224,119,224,224,224,224,224,119,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-217,217,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 238 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,
/* block 239 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21,923,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,
-923, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-923, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-923, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21,923,923,923,923,923,923,923,923,923,923,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
+ 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20,
/* block 240 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,923,923,923,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,
- 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 19,
+ 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,850,850,850,850,850,850,850,850,850,850,
+850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
/* block 241 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20,
- 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
+851, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
+ 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,
+ 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 242 */
-925, 21, 21,923,923,923,923,923,923,923,923,923,923,923,923,923,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
- 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20,923,923,923,923,
- 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,923,923,923,
-575,575,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 243 */
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 244 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-
-/* block 245 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,926,926,926,926,926,
-
-/* block 246 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20,
- 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-
-/* block 247 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-
-/* block 248 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,923,923,923,923,923,
-
-/* block 249 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,852,852,852,852,852,
+
+/* block 245 */
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19,
+ 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20,923,923,923,923,923,923,923,923,923,923,923,923,
-/* block 250 */
+/* block 246 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 21, 21, 21, 21,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 251 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,
+/* block 247 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,923,923,923,923,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,923,923,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,
+
+/* block 248 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,849,849,849,849,849,849,849,849,849,849,849,849,
+
+/* block 249 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 20, 20, 20, 20,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+
+/* block 250 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+
+/* block 251 */
+ 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 252 */
- 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,923,923,923,923,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 20, 20, 20,849,
+ 20, 20, 20, 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20,849,849, 20, 20, 20, 20,849,849,849, 20,849, 20, 20, 20, 20,
/* block 253 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 21, 21, 21,923,
- 21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21,923,923, 21, 21, 21, 21,923,923,923, 21,923, 21, 21, 21, 21,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,
+ 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 254 */
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21,923,923,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,923,923,923,923,923,
- 21, 21, 21,923,923,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 255 */
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,116,116,
/* block 256 */
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,119,119,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 257 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,116,116,116,116,116,116,116,116,116,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 258 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,119,119,119,119,119,119,119,119,119,119,119,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 259 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,119,119,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 260 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 261 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 262 */
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
-577,577,577,577,577,577,577,577,577,577,577,577,577,577,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
-119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,
+471, 23,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
/* block 263 */
-502, 24,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
-927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
/* block 264 */
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
/* block 265 */
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
/* block 266 */
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,
-502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,502,
-
-/* block 267 */
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,659,
-659,659,659,659,659,659,659,659,659,659,659,659,659,659,119,119,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,116,116,
};
diff --git a/dist2/src/pcre2_ucp.h b/dist2/src/pcre2_ucp.h
index 483abd18..0c330edc 100644
--- a/dist2/src/pcre2_ucp.h
+++ b/dist2/src/pcre2_ucp.h
@@ -124,7 +124,6 @@ enum {
/* These are the script identifications. */
enum {
- ucp_Unknown,
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
diff --git a/dist2/src/pcre2_xclass.c b/dist2/src/pcre2_xclass.c
index 8b052be6..407d3f5b 100644
--- a/dist2/src/pcre2_xclass.c
+++ b/dist2/src/pcre2_xclass.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -85,10 +85,10 @@ if (c < 256)
if ((*data & XCL_HASPROP) == 0)
{
if ((*data & XCL_MAP) == 0) return negated;
- return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0;
+ return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
}
if ((*data & XCL_MAP) != 0 &&
- (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0)
+ (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
diff --git a/dist2/src/pcre2grep.c b/dist2/src/pcre2grep.c
index a3cc3ec2..d5f34c81 100644
--- a/dist2/src/pcre2grep.c
+++ b/dist2/src/pcre2grep.c
@@ -68,18 +68,12 @@ POSSIBILITY OF SUCH DAMAGE.
#undef WIN32
#endif
-#ifdef __VMS
-#include clidef
-#include descrip
-#include lib$routines
-#endif
-
#ifdef WIN32
#include <io.h> /* For _setmode() */
#include <fcntl.h> /* For _O_BINARY */
#endif
-#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
+#ifdef SUPPORT_PCRE2GREP_CALLOUT
#ifdef WIN32
#include <process.h>
#else
@@ -579,6 +573,8 @@ status of 1, which is not helpful. To help with this problem, define a symbol
therein. */
#ifdef __VMS
+#include descrip
+#include lib$routines
char val_buf[4];
$DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
$DESCRIPTOR(sym_val, val_buf);
@@ -1137,11 +1133,7 @@ printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
#ifdef SUPPORT_PCRE2GREP_CALLOUT
-#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
-printf("All callout scripts in patterns are supported." STDOUT_NL);
-#else
-printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
-#endif
+printf("Callout scripts in patterns are supported." STDOUT_NL);
#else
printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
#endif
@@ -2025,10 +2017,10 @@ return printed;
* Parse and execute callout scripts *
*************************************************/
-/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
-string block and executes the program specified by the string. The string is a
-list of substrings separated by pipe characters. The first substring represents
-the executable name, and the following substrings specify the arguments:
+/* This function parses a callout string block and executes the
+program specified by the string. The string is a list of substrings
+separated by pipe characters. The first substring represents the
+executable name, and the following substrings specify the arguments:
program_name|param1|param2|...
@@ -2045,9 +2037,8 @@ follows:
dollar or $| replaced by a pipe character.
Alternatively, if string starts with pipe, the remainder is taken as an output
-string, same as --output. This is the only form that is supported if
-SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
-separate each callout, defaulting to newline.
+string, same as --output. In this case, --om-separator is used to separate each
+callout, defaulting to newline.
Example:
@@ -2075,8 +2066,6 @@ PCRE2_SPTR string = calloutptr->callout_string;
PCRE2_SPTR subject = calloutptr->subject;
PCRE2_SIZE *ovector = calloutptr->offset_vector;
PCRE2_SIZE capture_top = calloutptr->capture_top;
-
-#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
PCRE2_SIZE argsvectorlen = 2;
PCRE2_SIZE argslen = 1;
char *args;
@@ -2087,12 +2076,10 @@ char **argsvectorptr;
pid_t pid;
#endif
int result = 0;
-#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
(void)unused; /* Avoid compiler warning */
/* Only callout with strings are supported. */
-
if (string == NULL || length == 0) return 0;
/* If there's no command, output the remainder directly. */
@@ -2105,10 +2092,6 @@ if (*string == '|')
return 0;
}
-#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
-return 0;
-#else
-
/* Checking syntax and compute the number of string fragments. Callout strings
are ignored in case of a syntax error. */
@@ -2289,34 +2272,11 @@ while (length > 0)
*argsptr++ = '\0';
*argsvectorptr = NULL;
-/* Running an external command is system-dependent. Handle Windows and VMS as
-necessary, otherwise assume fork(). */
-
#ifdef WIN32
result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
-
-#elif defined __VMS
- {
- char cmdbuf[500];
- short i = 0;
- int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
- $DESCRIPTOR(cmd, cmdbuf);
-
- cmdbuf[0] = 0;
- while (argsvector[i])
- {
- strcat(cmdbuf, argsvector[i]);
- strcat(cmdbuf, " ");
- i++;
- }
- cmd.dsc$w_length = strlen(cmdbuf) - 1;
- status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
- if (!(status & 1)) result = 0;
- else result = retstat & 1 ? 0 : 1;
- }
-
-#else /* Neither Windows nor VMS */
+#else
pid = fork();
+
if (pid == 0)
{
(void)execv(argsvector[0], argsvector);
@@ -2325,7 +2285,7 @@ if (pid == 0)
}
else if (pid > 0)
(void)waitpid(pid, &result, 0);
-#endif /* End Windows/VMS/other handling */
+#endif
free(args);
free(argsvector);
@@ -2334,9 +2294,9 @@ free(argsvector);
continues) or non-zero (match fails). */
return result != 0;
-#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
}
-#endif /* SUPPORT_PCRE2GREP_CALLOUT */
+
+#endif
@@ -4342,7 +4302,6 @@ if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
EXIT:
#ifdef SUPPORT_PCRE2GREP_JIT
-pcre2_jit_free_unused_memory(NULL);
if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
#endif
diff --git a/dist2/src/pcre2posix.c b/dist2/src/pcre2posix.c
index 34a8d809..7b9f4774 100644
--- a/dist2/src/pcre2posix.c
+++ b/dist2/src/pcre2posix.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -40,12 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
-functions. The operative functions are called pcre2_regcomp(), etc., with
-wrappers that use the plain POSIX names. In addition, pcre2posix.h defines the
-POSIX names as macros for the pcre2_xxx functions, so any program that includes
-it and uses the POSIX names will call the base functions directly. This makes
-it easier for an application to be sure it gets the PCRE2 versions in the
-presence of other POSIX regex libraries. */
+functions. */
#ifdef HAVE_CONFIG_H
@@ -175,59 +170,13 @@ static const char *const pstring[] = {
-/*************************************************
-* Wrappers with traditional POSIX names *
-*************************************************/
-
-/* Keep defining them to preseve the ABI for applications linked to the pcre2
-POSIX library before these names were changed into macros in pcre2posix.h.
-This also ensures that the POSIX names are callable from languages that do not
-include pcre2posix.h. It is vital to #undef the macro definitions from
-pcre2posix.h! */
-
-#undef regerror
-PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
-PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
-regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
-{
-return pcre2_regerror(errcode, preg, errbuf, errbuf_size);
-}
-
-#undef regfree
-PCRE2POSIX_EXP_DECL void regfree(regex_t *);
-PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
-regfree(regex_t *preg)
-{
-pcre2_regfree(preg);
-}
-
-#undef regcomp
-PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
-PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
-regcomp(regex_t *preg, const char *pattern, int cflags)
-{
-return pcre2_regcomp(preg, pattern, cflags);
-}
-
-#undef regexec
-PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
- regmatch_t *, int);
-PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
-regexec(const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags)
-{
-return pcre2_regexec(preg, string, nmatch, pmatch, eflags);
-}
-
-
/*************************************************
* Translate error code to string *
*************************************************/
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
-pcre2_regerror(int errcode, const regex_t *preg, char *errbuf,
- size_t errbuf_size)
+regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
int used;
const char *message;
@@ -250,12 +199,13 @@ return used + 1;
+
/*************************************************
* Free store held by a regex *
*************************************************/
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
-pcre2_regfree(regex_t *preg)
+regfree(regex_t *preg)
{
pcre2_match_data_free(preg->re_match_data);
pcre2_code_free(preg->re_pcre2_code);
@@ -263,6 +213,7 @@ pcre2_code_free(preg->re_pcre2_code);
+
/*************************************************
* Compile a regular expression *
*************************************************/
@@ -278,7 +229,7 @@ Returns: 0 on success
*/
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_regcomp(regex_t *preg, const char *pattern, int cflags)
+regcomp(regex_t *preg, const char *pattern, int cflags)
{
PCRE2_SIZE erroffset;
PCRE2_SIZE patlen;
@@ -345,7 +296,7 @@ for each match. If REG_NOSUB was specified at compile time, the nmatch and
pmatch arguments are ignored, and the only result is yes/no/error. */
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch,
+regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
int rc, so, eo;
diff --git a/dist2/src/pcre2posix.h b/dist2/src/pcre2posix.h
index 3a663b9f..4ae1d3c2 100644
--- a/dist2/src/pcre2posix.h
+++ b/dist2/src/pcre2posix.h
@@ -3,13 +3,11 @@
*************************************************/
/* PCRE2 is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language. This is
-the public header file to be #included by applications that call PCRE2 via the
-POSIX wrapper interface.
+and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -138,30 +136,13 @@ file. */
# endif
#endif
-/* The functions. The actual code is in functions with pcre2_xxx names for
-uniqueness. POSIX names are provided as macros for API compatibility with POSIX
-regex functions. It's done this way to ensure to they are always linked from
-the PCRE2 library and not by accident from elsewhere (regex_t differs in size
-elsewhere). */
+/* The functions */
-PCRE2POSIX_EXP_DECL int pcre2_regcomp(regex_t *, const char *, int);
-PCRE2POSIX_EXP_DECL int pcre2_regexec(const regex_t *, const char *, size_t,
+PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
+PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
-PCRE2POSIX_EXP_DECL size_t pcre2_regerror(int, const regex_t *, char *, size_t);
-PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *);
-
-#define regcomp pcre2_regcomp
-#define regexec pcre2_regexec
-#define regerror pcre2_regerror
-#define regfree pcre2_regfree
-
-/* Debian had a patch that used different names. These are now here to save
-them having to maintain their own patch, but are not documented by PCRE2. */
-
-#define PCRE2regcomp pcre2_regcomp
-#define PCRE2regexec pcre2_regexec
-#define PCRE2regerror pcre2_regerror
-#define PCRE2regfree pcre2_regfree
+PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
+PCRE2POSIX_EXP_DECL void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/dist2/src/pcre2test.c b/dist2/src/pcre2test.c
index 40107728..8cfb8e91 100644
--- a/dist2/src/pcre2test.c
+++ b/dist2/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016-2019 University of Cambridge
+ Rewritten code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -157,22 +157,14 @@ patterns. */
#endif
#endif
-/* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
-user [2] provided alternative code which worked better for him. I have
-commented out the original, but kept it around just in case. */
-
#ifdef __VMS
#include <ssdef.h>
-/* These two includes came from [2]. */
-#include descrip
-#include lib$routines
-/* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
+void vms_setsymbol( char *, char *, int );
#endif
-/* VC and older compilers don't support %td or %zu, and even some that claim to
-be C99 don't support it (hence DISABLE_PERCENT_ZT). */
+/* VC and older compilers don't support %td or %zu. */
-#if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
+#if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
#define PTR_FORM "lu"
#define SIZ_FORM "lu"
#define SIZ_CAST (unsigned long int)
@@ -492,15 +484,13 @@ so many of them that they are split into two fields. */
/* Second control word */
-#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
-#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
-#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
-#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
-#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
-#define CTL2_SUBJECT_LITERAL 0x00000020u
-#define CTL2_CALLOUT_NO_WHERE 0x00000040u
-#define CTL2_CALLOUT_EXTRA 0x00000080u
-#define CTL2_ALLVECTOR 0x00000100u
+#define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
+#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
+#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
+#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
+#define CTL2_SUBJECT_LITERAL 0x00000010u
+#define CTL2_CALLOUT_NO_WHERE 0x00000020u
+#define CTL2_CALLOUT_EXTRA 0x00000040u
#define CTL2_NL_SET 0x40000000u /* Informational */
#define CTL2_BSR_SET 0x80000000u /* Informational */
@@ -520,26 +510,22 @@ different things in the two cases. */
CTL_STARTCHAR|\
CTL_UTF8_INPUT)
-#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
- CTL2_SUBSTITUTE_EXTENDED|\
+#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
- CTL2_SUBSTITUTE_UNSET_EMPTY|\
- CTL2_ALLVECTOR)
+ CTL2_SUBSTITUTE_UNSET_EMPTY)
/* Structures for holding modifier information for patterns and subject strings
(data). Fields containing modifiers that can be set either for a pattern or a
subject must be at the start and in the same order in both cases so that the
same offset in the big table below works for both. */
-typedef struct patctl { /* Structure for pattern modifiers. */
- uint32_t options; /* Must be in same position as datctl */
- uint32_t control; /* Must be in same position as datctl */
- uint32_t control2; /* Must be in same position as datctl */
- uint32_t jitstack; /* Must be in same position as datctl */
+typedef struct patctl { /* Structure for pattern modifiers. */
+ uint32_t options; /* Must be in same position as datctl */
+ uint32_t control; /* Must be in same position as datctl */
+ uint32_t control2; /* Must be in same position as datctl */
+ uint32_t jitstack; /* Must be in same position as datctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
- uint32_t substitute_skip; /* Must be in same position as patctl */
- uint32_t substitute_stop; /* Must be in same position as patctl */
uint32_t jit;
uint32_t stackguard_test;
uint32_t tables_id;
@@ -554,14 +540,12 @@ typedef struct patctl { /* Structure for pattern modifiers. */
#define MAXCPYGET 10
#define LENCPYGET 64
-typedef struct datctl { /* Structure for data line modifiers. */
- uint32_t options; /* Must be in same position as patctl */
- uint32_t control; /* Must be in same position as patctl */
- uint32_t control2; /* Must be in same position as patctl */
- uint32_t jitstack; /* Must be in same position as patctl */
+typedef struct datctl { /* Structure for data line modifiers. */
+ uint32_t options; /* Must be in same position as patctl */
+ uint32_t control; /* Must be in same position as patctl */
+ uint32_t control2; /* Must be in same position as patctl */
+ uint32_t jitstack; /* Must be in same position as patctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
- uint32_t substitute_skip; /* Must be in same position as patctl */
- uint32_t substitute_stop; /* Must be in same position as patctl */
uint32_t startend[2];
uint32_t cerror[2];
uint32_t cfail[2];
@@ -608,7 +592,6 @@ static modstruct modlist[] = {
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
{ "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
- { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
@@ -632,7 +615,6 @@ static modstruct modlist[] = {
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
{ "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
- { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
{ "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
{ "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
{ "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
@@ -642,11 +624,9 @@ static modstruct modlist[] = {
{ "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
- { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
{ "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
{ "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
- { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
{ "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
{ "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
{ "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
@@ -707,11 +687,8 @@ static modstruct modlist[] = {
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
- { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
- { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
- { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
{ "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
{ "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
@@ -911,7 +888,6 @@ static uint32_t forbid_utf = 0;
static uint32_t maxlookbehind;
static uint32_t max_oveccount;
static uint32_t callout_count;
-static uint32_t maxcapcount;
static uint16_t local_newline_default = 0;
@@ -1375,26 +1351,15 @@ are supported. */
else \
pcre2_set_parens_nest_limit_32(G(a,32),b)
-#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
- if (test_mode == PCRE8_MODE) \
- pcre2_set_substitute_callout_8(G(a,8), \
- (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
- else if (test_mode == PCRE16_MODE) \
- pcre2_set_substitute_callout_16(G(a,16), \
- (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
- else \
- pcre2_set_substitute_callout_32(G(a,32), \
- (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
-
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == PCRE8_MODE) \
- a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
else if (test_mode == PCRE16_MODE) \
- a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
else \
- a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
@@ -1855,22 +1820,14 @@ the three different cases. */
else \
G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
-#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
- if (test_mode == G(G(PCRE,BITONE),_MODE)) \
- G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
- (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
- else \
- G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
- (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
-
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
- G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
+ G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
(G(PCRE2_UCHAR,BITONE) *)k,l); \
else \
a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
- G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
+ G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
(G(PCRE2_UCHAR,BITTWO) *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
@@ -2064,11 +2021,8 @@ the three different cases. */
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
-#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
- pcre2_set_substitute_callout_8(G(a,8), \
- (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
- a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
@@ -2171,11 +2125,8 @@ the three different cases. */
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
-#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
- pcre2_set_substitute_callout_16(G(a,16), \
- (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
- a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
@@ -2266,7 +2217,7 @@ the three different cases. */
#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
r = pcre2_serialize_get_number_of_codes_32(a)
#define PCRE2_SET_CALLOUT(a,b,c) \
- pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
+ pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
@@ -2278,11 +2229,8 @@ the three different cases. */
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
-#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
- pcre2_set_substitute_callout_32(G(a,32), \
- (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
- a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
@@ -3050,14 +2998,13 @@ return yield;
+#ifdef SUPPORT_PCRE2_8
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
-8-bit library is not supported, to generate UTF-8 output for non-ASCII
-characters.
+and encodes it as a UTF-8 character in 0 to 6 bytes.
Arguments:
cvalue the character value
@@ -3083,6 +3030,7 @@ for (j = i; j > 0; j--)
*utf8bytes = utf8_table2[i] | cvalue;
return i + 1;
}
+#endif /* SUPPORT_PCRE2_8 */
@@ -4070,13 +4018,12 @@ Returns: nothing
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
- ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
((controls & CTL_BINCODE) != 0)? " bincode" : "",
((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
@@ -4106,7 +4053,6 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
- ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
@@ -4190,14 +4136,12 @@ show_compile_extra_options(uint32_t options, const char *before,
const char *after)
{
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
-else fprintf(outfile, "%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s",
before,
((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
- ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
- ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
after);
}
@@ -4213,13 +4157,11 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s",
static void
show_match_options(uint32_t options)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s",
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
- ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
- ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
@@ -4376,7 +4318,6 @@ static int
show_pattern_info(void)
{
uint32_t compile_options, overall_options, extra_options;
-BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{
@@ -4466,7 +4407,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
!= 0)
return PR_ABEND;
- fprintf(outfile, "Capture group count = %d\n", capture_count);
+ fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
if (backrefmax > 0)
fprintf(outfile, "Max back reference = %d\n", backrefmax);
@@ -4485,60 +4426,14 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (namecount > 0)
{
- fprintf(outfile, "Named capture groups:\n");
+ fprintf(outfile, "Named capturing subpatterns:\n");
for (; namecount > 0; namecount--)
{
int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
fprintf(outfile, " ");
-
- /* In UTF mode the name may be a UTF string containing non-ASCII
- letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
- use the normal string printing functions, which use escapes for all
- non-ASCII characters. */
-
- if (utf)
- {
-#ifdef SUPPORT_PCRE2_32
- if (test_mode == PCRE32_MODE)
- {
- PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
- while (*nameptr != 0)
- {
- uint8_t u8buff[6];
- int len = ord2utf8(*nameptr++, u8buff);
- fprintf(outfile, "%.*s", len, u8buff);
- }
- }
-#endif
-#ifdef SUPPORT_PCRE2_16
- if (test_mode == PCRE16_MODE)
- {
- PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
- while (*nameptr != 0)
- {
- int len;
- uint8_t u8buff[6];
- uint32_t c = *nameptr++ & 0xffff;
- if (c >= 0xD800 && c < 0xDC00)
- c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
- len = ord2utf8(c, u8buff);
- fprintf(outfile, "%.*s", len, u8buff);
- }
- }
-#endif
-#ifdef SUPPORT_PCRE2_8
- if (test_mode == PCRE8_MODE)
- fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
-#endif
- }
- else /* Not UTF mode */
- {
- PCHARSV(nametable, imm2_size, length, FALSE, outfile);
- }
-
+ PCHARSV(nametable, imm2_size, length, FALSE, outfile);
while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
-
#ifdef SUPPORT_PCRE2_32
if (test_mode == PCRE32_MODE)
fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
@@ -4552,7 +4447,6 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "%3d\n", (int)(
((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
#endif
-
nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
}
}
@@ -4659,7 +4553,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "Starting code units: ");
for (i = 0; i < 256; i++)
{
- if ((start_bits[i/8] & (1u << (i&7))) != 0)
+ if ((start_bits[i/8] & (1<<(i&7))) != 0)
{
if (c > 75)
{
@@ -5823,11 +5717,6 @@ if (forbid_utf != 0)
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
return PR_ABEND;
-/* Remember the number of captures. */
-
-if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
- return PR_ABEND;
-
/* If an explicit newline modifier was given, set the information flag in the
pattern so that it is preserved over push/pop. */
@@ -5998,58 +5887,6 @@ return capcount;
/*************************************************
-* Substitute callout function *
-*************************************************/
-
-/* Called from pcre2_substitute() when the substitute_callout modifier is set.
-Print out the data that is passed back. The substitute callout block is
-identical for all code unit widths, so we just pick one.
-
-Arguments:
- scb pointer to substitute callout block
- data_ptr callout data
-
-Returns: nothing
-*/
-
-static int
-substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
- void *data_ptr)
-{
-int yield = 0;
-BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
-(void)data_ptr; /* Not used */
-
-fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
- scb->subscount, scb->oveccount,
- SIZ_CAST scb->ovector[0], SIZ_CAST scb->ovector[1]);
-
-PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
- utf, outfile);
-
-fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
- SIZ_CAST scb->output_offsets[0], SIZ_CAST scb->output_offsets[1]);
-
-PCHARSV(scb->output, scb->output_offsets[0],
- scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
-
-if (scb->subscount == dat_datctl.substitute_stop)
- {
- yield = -1;
- fprintf(outfile, " STOPPED");
- }
-else if (scb->subscount == dat_datctl.substitute_skip)
- {
- yield = +1;
- fprintf(outfile, " SKIPPED");
- }
-
-fprintf(outfile, "\"\n");
-return yield;
-}
-
-
-/*************************************************
* Callout function *
*************************************************/
@@ -6060,11 +5897,8 @@ callout block for different code unit widths are that the pointers to the
subject, the most recent MARK, and a callout argument string point to strings
of the appropriate width. Casts can be used to deal with this.
-Arguments:
- cb a pointer to a callout block
- callout_data_ptr the provided callout data
-
-Returns: 0 or 1 or an error, as determined by settings
+Argument: a pointer to a callout block
+Return:
*/
static int
@@ -6484,42 +6318,6 @@ return TRUE;
/*************************************************
-* Show an entire ovector *
-*************************************************/
-
-/* This function is called after partial matching or match failure, when the
-"allvector" modifier is set. It is a means of checking the contents of the
-entire ovector, to ensure no modification of fields that should be unchanged.
-
-Arguments:
- ovector points to the ovector
- oveccount number of pairs
-
-Returns: nothing
-*/
-
-static void
-show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
-{
-uint32_t i;
-for (i = 0; i < 2*oveccount; i += 2)
- {
- PCRE2_SIZE start = ovector[i];
- PCRE2_SIZE end = ovector[i+1];
-
- fprintf(outfile, "%2d: ", i/2);
- if (start == PCRE2_UNSET && end == PCRE2_UNSET)
- fprintf(outfile, "<unset>\n");
- else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
- fprintf(outfile, "<unchanged>\n");
- else
- fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
- (unsigned long int)end);
- }
-}
-
-
-/*************************************************
* Process a data line *
*************************************************/
@@ -6544,10 +6342,7 @@ size_t needlen;
void *use_dat_context;
BOOL utf;
BOOL subject_literal;
-
-PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[3];
-uint32_t oveccount;
#ifdef SUPPORT_PCRE2_8
uint8_t *q8 = NULL;
@@ -6574,11 +6369,6 @@ dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
-if (dat_datctl.substitute_skip == 0)
- dat_datctl.substitute_skip = pat_patctl.substitute_skip;
-if (dat_datctl.substitute_stop == 0)
- dat_datctl.substitute_stop = pat_patctl.substitute_stop;
-
/* Initialize for scanning the data line. */
#ifdef SUPPORT_PCRE2_8
@@ -6861,7 +6651,7 @@ while ((c = *p++) != 0)
fprintf(outfile, "** Truncation will probably give the wrong "
"result.\n");
}
- *q8++ = (uint8_t)c;
+ *q8++ = c;
}
}
#endif
@@ -6895,7 +6685,7 @@ while ((c = *p++) != 0)
"result.\n");
}
- *q16++ = (uint16_t)c;
+ *q16++ = c;
}
}
#endif
@@ -6918,11 +6708,6 @@ arg_ulen = ulen; /* Value to use in match arg */
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
return PR_OK;
-/* Setting substitute_{skip,fail} implies a substitute callout. */
-
-if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
- dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
-
/* Check for mutually exclusive modifiers. At present, these are all in the
first control word. */
@@ -6937,25 +6722,11 @@ for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
}
}
-if (pat_patctl.replacement[0] != 0)
+if (pat_patctl.replacement[0] != 0 &&
+ (dat_datctl.control & CTL_NULLCONTEXT) != 0)
{
- if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
- (dat_datctl.control & CTL_NULLCONTEXT) != 0)
- {
- fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
- return PR_OK;
- }
-
- if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
- fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
- }
-
-/* Warn for modifiers that are ignored for DFA. */
-
-if ((dat_datctl.control & CTL_DFA) != 0)
- {
- if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
- fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
+ fprintf(outfile, "** Replacement text is not supported with null_context.\n");
+ return PR_OK;
}
/* We now have the subject in dbuffer, with len containing the byte length, and
@@ -7184,9 +6955,6 @@ if (CASTVAR(void *, match_data) == NULL)
return PR_OK;
}
-ovector = FLD(match_data, ovector);
-PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
-
/* Replacement processing is ignored for DFA matching. */
if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
@@ -7206,7 +6974,7 @@ if (dat_datctl.replacement[0] != 0)
uint8_t rbuffer[REPLACE_BUFFSIZE];
uint8_t nbuffer[REPLACE_BUFFSIZE];
uint32_t xoptions;
- PCRE2_SIZE j, rlen, nsize, erroroffset;
+ PCRE2_SIZE rlen, nsize, erroroffset;
BOOL badutf = FALSE;
#ifdef SUPPORT_PCRE2_8
@@ -7219,11 +6987,6 @@ if (dat_datctl.replacement[0] != 0)
uint32_t *r32 = NULL;
#endif
- /* Fill the ovector with junk to detect elements that do not get set
- when they should be (relevant only when "allvector" is specified). */
-
- for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
-
if (timeitm)
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
@@ -7326,18 +7089,8 @@ if (dat_datctl.replacement[0] != 0)
rlen = PCRE2_ZERO_TERMINATED;
else
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
-
- if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
- {
- PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
- }
- else
- {
- PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
- }
-
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
- dat_datctl.options|xoptions, match_data, use_dat_context,
+ dat_datctl.options|xoptions, match_data, dat_context,
rbuffer, rlen, nbuffer, &nsize);
if (rc < 0)
@@ -7359,12 +7112,6 @@ if (dat_datctl.replacement[0] != 0)
fprintf(outfile, "\n");
show_memory = FALSE;
-
- /* Show final ovector contents if requested. */
-
- if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
- show_ovector(ovector, oveccount);
-
return PR_OK;
} /* End of substitution handling */
@@ -7378,11 +7125,14 @@ for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
+ PCRE2_SIZE *ovector;
+
+ ovector = FLD(match_data, ovector);
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
- for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
+ for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */
@@ -7530,8 +7280,12 @@ for (gmatched = 0;; gmatched++)
if (capcount >= 0)
{
int i;
+ uint32_t oveccount;
- if (capcount > (int)oveccount) /* Check for lunatic return value */
+ /* This is a check against a lunatic return value. */
+
+ PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
+ if (capcount > (int)oveccount)
{
fprintf(outfile,
"** PCRE2 error: returned count %d is too big for ovector count %d\n",
@@ -7544,25 +7298,6 @@ for (gmatched = 0;; gmatched++)
}
}
- /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
- should be, but not for fast JIT, where it isn't supported. */
-
- if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
- (pat_patctl.control & CTL_JITFAST) == 0)
- {
- if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
- fprintf(outfile,
- "** PCRE2 error: flag not set after copy_matched_subject\n");
-
- if (CASTFLD(void *, match_data, subject) == pp)
- fprintf(outfile,
- "** PCRE2 error: copy_matched_subject has not copied\n");
-
- if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
- fprintf(outfile,
- "** PCRE2 error: copy_matched_subject mismatch\n");
- }
-
/* If this is not the first time round a global loop, check that the
returned string has changed. If it has not, check for an empty string match
at different starting offset from the previous match. This is a failed test
@@ -7590,18 +7325,24 @@ for (gmatched = 0;; gmatched++)
/* "allcaptures" requests showing of all captures in the pattern, to check
unset ones at the end. It may be set on the pattern or the data. Implement
by setting capcount to the maximum. This is not relevant for DFA matching,
- so ignore it (warning given above). */
+ so ignore it. */
- if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
+ if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
{
- capcount = maxcapcount + 1; /* Allow for full match */
- if (capcount > (int)oveccount) capcount = oveccount;
+ uint32_t maxcapcount;
+ if ((dat_datctl.control & CTL_DFA) != 0)
+ {
+ fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
+ }
+ else
+ {
+ if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
+ return PR_SKIP;
+ capcount = maxcapcount + 1; /* Allow for full match */
+ if (capcount > (int)oveccount) capcount = oveccount;
+ }
}
- /* "allvector" request showing the entire ovector. */
-
- if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
-
/* Output the captured substrings. Note that, for the matched string,
the use of \K in an assertion can make the start later than the end. */
@@ -7623,26 +7364,19 @@ for (gmatched = 0;; gmatched++)
/* Check for an unset group */
- if (start == PCRE2_UNSET && end == PCRE2_UNSET)
+ if (start == PCRE2_UNSET)
{
fprintf(outfile, "<unset>\n");
continue;
}
/* Check for silly offsets, in particular, values that have not been
- set when they should have been. However, if we are past the end of the
- captures for this pattern ("allvector" causes this), or if we are DFA
- matching, it isn't an error if the entry is unchanged. */
+ set when they should have been. */
if (start > ulen || end > ulen)
{
- if (((dat_datctl.control & CTL_DFA) != 0 ||
- i >= (int)(2*maxcapcount + 2)) &&
- start == JUNK_OFFSET && end == JUNK_OFFSET)
- fprintf(outfile, "<unchanged>\n");
- else
- fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
- (unsigned long int)start, (unsigned long int)end);
+ fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
+ (unsigned long int)start, (unsigned long int)end);
continue;
}
@@ -7783,19 +7517,10 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "\n");
}
- if (ulen != ovector[1])
- fprintf(outfile, "** ovector[1] is not equal to the subject length: "
- "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
-
/* Process copy/get strings */
if (!copy_and_get(utf, 1)) return PR_ABEND;
- /* "allvector" outputs the entire vector */
-
- if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
- show_ovector(ovector, oveccount);
-
break; /* Out of the /g loop */
} /* End of handling partial match */
@@ -7865,11 +7590,6 @@ for (gmatched = 0;; gmatched++)
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
-
- /* "allvector" outputs the entire vector */
-
- if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
- show_ovector(ovector, oveccount);
}
break;
@@ -8181,13 +7901,9 @@ if (arg != NULL && arg[0] != CHAR_MINUS)
break;
}
-/* For VMS, return the value by setting a symbol, for certain values only. This
-is contributed code which the PCRE2 developers have no means of testing. */
+/* For VMS, return the value by setting a symbol, for certain values only. */
#ifdef __VMS
-
-/* This is the original code provided by the first VMS contributor. */
-#ifdef NEVER
if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
{
char ucname[16];
@@ -8197,22 +7913,6 @@ is contributed code which the PCRE2 developers have no means of testing. */
}
#endif
-/* This is the new code, provided by a second VMS contributor. */
-
- if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
- {
- char nam_buf[22], val_buf[4];
- $DESCRIPTOR(nam, nam_buf);
- $DESCRIPTOR(val, val_buf);
-
- strcpy(nam_buf, coptlist[i].name);
- nam.dsc$w_length = strlen(nam_buf);
- sprintf(val_buf, "%d", yield);
- val.dsc$w_length = strlen(val_buf);
- lib$set_symbol(&nam, &val);
- }
-#endif /* __VMS */
-
return yield;
}
@@ -8271,7 +7971,7 @@ printf(" Internal link size = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
printf(" Parentheses nest limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
-printf(" Default heap limit = %d kibibytes\n", optval);
+printf(" Default heap limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
printf(" Default match limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
diff --git a/dist2/src/sljit/sljitConfigInternal.h b/dist2/src/sljit/sljitConfigInternal.h
index ba60311e..f5703e8e 100644
--- a/dist2/src/sljit/sljitConfigInternal.h
+++ b/dist2/src/sljit/sljitConfigInternal.h
@@ -530,7 +530,7 @@ typedef double sljit_f64;
#endif /* !SLJIT_FUNC */
#ifndef SLJIT_INDIRECT_CALL
-#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \
+#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
/* It seems certain ppc compilers use an indirect addressing for functions
which makes things complicated. */
diff --git a/dist2/src/sljit/sljitExecAllocator.c b/dist2/src/sljit/sljitExecAllocator.c
index 3b37a975..7c185786 100644
--- a/dist2/src/sljit/sljitExecAllocator.c
+++ b/dist2/src/sljit/sljitExecAllocator.c
@@ -94,46 +94,6 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
#else
-#ifdef __APPLE__
-/* Configures TARGET_OS_OSX when appropriate */
-#include <TargetConditionals.h>
-
-#if TARGET_OS_OSX && defined(MAP_JIT)
-#include <sys/utsname.h>
-#endif /* TARGET_OS_OSX && MAP_JIT */
-
-#ifdef MAP_JIT
-
-static SLJIT_INLINE int get_map_jit_flag()
-{
-#if TARGET_OS_OSX
- /* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a version
- of macOS where it's OK to have more than one JIT block. On non-macOS systems, returns
- MAP_JIT if it is defined. */
- static int map_jit_flag = -1;
-
- /* The following code is thread safe because multiple initialization
- sets map_jit_flag to the same value and the code has no side-effects.
- Changing the kernel version witout system restart is (very) unlikely. */
- if (map_jit_flag == -1) {
- struct utsname name;
-
- uname(&name);
-
- /* Kernel version for 10.14.0 (Mojave) */
- map_jit_flag = (atoi(name.release) >= 18) ? MAP_JIT : 0;
- }
-
- return map_jit_flag;
-#else /* !TARGET_OS_OSX */
- return MAP_JIT;
-#endif /* TARGET_OS_OSX */
-}
-
-#endif /* MAP_JIT */
-
-#endif /* __APPLE__ */
-
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
{
void *retval;
@@ -143,17 +103,17 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
int flags = MAP_PRIVATE | MAP_ANON;
#ifdef MAP_JIT
- flags |= get_map_jit_flag();
+ flags |= MAP_JIT;
#endif
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
-#else /* !MAP_ANON */
+#else
if (dev_zero < 0) {
if (open_dev_zero())
return NULL;
}
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
-#endif /* MAP_ANON */
+#endif
return (retval != MAP_FAILED) ? retval : NULL;
}
diff --git a/dist2/src/sljit/sljitLir.c b/dist2/src/sljit/sljitLir.c
index ded9541b..5bdddc10 100644
--- a/dist2/src/sljit/sljitLir.c
+++ b/dist2/src/sljit/sljitLir.c
@@ -201,16 +201,15 @@
# define IS_CALL 0x010
# define IS_BIT26_COND 0x020
# define IS_BIT16_COND 0x040
-# define IS_BIT23_COND 0x080
-# define IS_COND (IS_BIT26_COND | IS_BIT16_COND | IS_BIT23_COND)
+# define IS_COND (IS_BIT26_COND | IS_BIT16_COND)
-# define PATCH_B 0x100
-# define PATCH_J 0x200
+# define PATCH_B 0x080
+# define PATCH_J 0x100
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-# define PATCH_ABS32 0x400
-# define PATCH_ABS48 0x800
+# define PATCH_ABS32 0x200
+# define PATCH_ABS48 0x400
#endif
/* instruction types */
diff --git a/dist2/src/sljit/sljitNativeARM_64.c b/dist2/src/sljit/sljitNativeARM_64.c
index b015695c..27af7414 100644
--- a/dist2/src/sljit/sljitNativeARM_64.c
+++ b/dist2/src/sljit/sljitNativeARM_64.c
@@ -51,7 +51,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
0, 0, 1, 2, 3, 4, 5, 6, 7
};
-#define W_OP (1u << 31)
+#define W_OP (1 << 31)
#define RD(rd) (reg_map[rd])
#define RT(rt) (reg_map[rt])
#define RN(rn) (reg_map[rn] << 5)
@@ -560,7 +560,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
/* dst must be register, TMP_REG1
arg1 must be register, TMP_REG1, imm
arg2 must be register, TMP_REG2, imm */
- sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0;
+ sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
sljit_ins inst_bits;
sljit_s32 op = (flags & 0xffff);
sljit_s32 reg;
@@ -710,7 +710,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_MOV_U8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10));
+ return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_S8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (!(flags & INT_OP))
@@ -718,7 +718,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_U16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10));
+ return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV_S16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (!(flags & INT_OP))
@@ -728,7 +728,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if ((flags & INT_OP) && dst == arg2)
return SLJIT_SUCCESS;
- return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+ return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_MOV_S32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if ((flags & INT_OP) && dst == arg2)
@@ -1080,7 +1080,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
- sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0;
+ sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0;
CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op));
@@ -1360,7 +1360,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
- inv_bits |= W_OP;
+ inv_bits |= (1 << 31);
if (src & SLJIT_MEM) {
emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
@@ -1382,7 +1382,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- inv_bits |= W_OP;
+ inv_bits |= (1 << 31);
if (src & SLJIT_MEM) {
emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
@@ -1662,7 +1662,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi
sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump;
- sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0;
+ sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0;
SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
ADJUST_LOCAL_OFFSET(src, srcw);
@@ -1787,7 +1787,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw)
{
- sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0;
+ sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0;
sljit_ins cc;
CHECK_ERROR();
diff --git a/dist2/src/sljit/sljitNativeMIPS_32.c b/dist2/src/sljit/sljitNativeMIPS_32.c
index ad970bf2..094c9923 100644
--- a/dist2/src/sljit/sljitNativeMIPS_32.c
+++ b/dist2/src/sljit/sljitNativeMIPS_32.c
@@ -368,21 +368,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
-#else /* !SLJIT_MIPS_R1 && !SLJIT_MIPS_R6 */
+#else
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_R1 || SLJIT_MIPS_R6 */
+#endif
}
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
- FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
- FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-#else /* !SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
diff --git a/dist2/src/sljit/sljitNativeMIPS_64.c b/dist2/src/sljit/sljitNativeMIPS_64.c
index a6a2bcc0..f841aef5 100644
--- a/dist2/src/sljit/sljitNativeMIPS_64.c
+++ b/dist2/src/sljit/sljitNativeMIPS_64.c
@@ -459,26 +459,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
- return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
-#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#else /* !SLJIT_MIPS_R6 && !SLJIT_MIPS_R1 */
+#else
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_R6 */
+#endif
}
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
- FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-#else /* !SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
diff --git a/dist2/src/sljit/sljitNativeMIPS_common.c b/dist2/src/sljit/sljitNativeMIPS_common.c
index e0d6a3f0..894e2130 100644
--- a/dist2/src/sljit/sljitNativeMIPS_common.c
+++ b/dist2/src/sljit/sljitNativeMIPS_common.c
@@ -27,31 +27,17 @@
/* Latest MIPS architecture. */
/* Automatically detect SLJIT_MIPS_R1 */
-#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
-#define SLJIT_MIPS_R6 1
-#endif
-
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- return "MIPS32-R6" SLJIT_CPUINFO;
-#else /* !SLJIT_CONFIG_MIPS_32 */
- return "MIPS64-R6" SLJIT_CPUINFO;
-#endif /* SLJIT_CONFIG_MIPS_32 */
-
-#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
-
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
return "MIPS32-R1" SLJIT_CPUINFO;
-#else /* !SLJIT_CONFIG_MIPS_32 */
+#else
return "MIPS64-R1" SLJIT_CPUINFO;
-#endif /* SLJIT_CONFIG_MIPS_32 */
-
+#endif
#else /* SLJIT_MIPS_R1 */
return "MIPS III" SLJIT_CPUINFO;
-#endif /* SLJIT_MIPS_R6 */
+#endif
}
/* Length of an instruction word
@@ -76,7 +62,6 @@ typedef sljit_u32 sljit_ins;
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
-#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
@@ -84,14 +69,14 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
- 0, 0, 14, 2, 4, 6, 8, 12, 10, 16
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 14, 2, 4, 6, 8, 12, 10
};
#else
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
- 0, 0, 13, 14, 15, 16, 17, 12, 18, 10
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 13, 14, 15, 16, 17, 12, 18
};
#endif
@@ -117,11 +102,6 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define FR(dr) (freg_map[dr])
#define HI(opcode) ((opcode) << 26)
#define LO(opcode) (opcode)
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-/* CMP.cond.fmt */
-/* S = (20 << 21) D = (21 << 21) */
-#define CMP_FMT_S (20 << 21)
-#endif /* SLJIT_MIPS_R6 */
/* S = (16 << 21) D = (17 << 21) */
#define FMT_S (16 << 21)
#define FMT_D (17 << 21)
@@ -134,13 +114,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define ANDI (HI(12))
#define B (HI(4))
#define BAL (HI(1) | (17 << 16))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define BC1EQZ (HI(17) | (9 << 21) | FT(TMP_FREG3))
-#define BC1NEZ (HI(17) | (13 << 21) | FT(TMP_FREG3))
-#else /* !SLJIT_MIPS_R6 */
#define BC1F (HI(17) | (8 << 21))
#define BC1T (HI(17) | (8 << 21) | (1 << 16))
-#endif /* SLJIT_MIPS_R6 */
#define BEQ (HI(4))
#define BGEZ (HI(1) | (1 << 16))
#define BGTZ (HI(7))
@@ -149,42 +124,20 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define BNE (HI(5))
#define BREAK (HI(0) | LO(13))
#define CFC1 (HI(17) | (2 << 21))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3))
-#define C_ULE_S (HI(17) | CMP_FMT_S | LO(7))
-#define C_ULT_S (HI(17) | CMP_FMT_S | LO(5))
-#define C_UN_S (HI(17) | CMP_FMT_S | LO(1))
-#define C_FD (FD(TMP_FREG3))
-#else /* !SLJIT_MIPS_R6 */
+#define C_UN_S (HI(17) | FMT_S | LO(49))
#define C_UEQ_S (HI(17) | FMT_S | LO(51))
#define C_ULE_S (HI(17) | FMT_S | LO(55))
#define C_ULT_S (HI(17) | FMT_S | LO(53))
-#define C_UN_S (HI(17) | FMT_S | LO(49))
-#define C_FD (0)
-#endif /* SLJIT_MIPS_R6 */
#define CVT_S_S (HI(17) | FMT_S | LO(32))
#define DADDIU (HI(25))
#define DADDU (HI(0) | LO(45))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define DDIV (HI(0) | (2 << 6) | LO(30))
-#define DDIVU (HI(0) | (2 << 6) | LO(31))
-#define DMOD (HI(0) | (3 << 6) | LO(30))
-#define DMODU (HI(0) | (3 << 6) | LO(31))
-#define DIV (HI(0) | (2 << 6) | LO(26))
-#define DIVU (HI(0) | (2 << 6) | LO(27))
-#define DMUH (HI(0) | (3 << 6) | LO(28))
-#define DMUHU (HI(0) | (3 << 6) | LO(29))
-#define DMUL (HI(0) | (2 << 6) | LO(28))
-#define DMULU (HI(0) | (2 << 6) | LO(29))
-#else /* !SLJIT_MIPS_R6 */
#define DDIV (HI(0) | LO(30))
#define DDIVU (HI(0) | LO(31))
#define DIV (HI(0) | LO(26))
#define DIVU (HI(0) | LO(27))
+#define DIV_S (HI(17) | FMT_S | LO(3))
#define DMULT (HI(0) | LO(28))
#define DMULTU (HI(0) | LO(29))
-#endif /* SLJIT_MIPS_R6 */
-#define DIV_S (HI(17) | FMT_S | LO(3))
#define DSLL (HI(0) | LO(56))
#define DSLL32 (HI(0) | LO(60))
#define DSLLV (HI(0) | LO(20))
@@ -198,34 +151,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define J (HI(2))
#define JAL (HI(3))
#define JALR (HI(0) | LO(9))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define JR (HI(0) | LO(9))
-#else /* !SLJIT_MIPS_R6 */
#define JR (HI(0) | LO(8))
-#endif /* SLJIT_MIPS_R6 */
#define LD (HI(55))
#define LUI (HI(15))
#define LW (HI(35))
#define MFC1 (HI(17))
-#if !(defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
#define MFHI (HI(0) | LO(16))
#define MFLO (HI(0) | LO(18))
-#else /* SLJIT_MIPS_R6 */
-#define MOD (HI(0) | (3 << 6) | LO(26))
-#define MODU (HI(0) | (3 << 6) | LO(27))
-#endif /* !SLJIT_MIPS_R6 */
#define MOV_S (HI(17) | FMT_S | LO(6))
#define MTC1 (HI(17) | (4 << 21))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define MUH (HI(0) | (3 << 6) | LO(24))
-#define MUHU (HI(0) | (3 << 6) | LO(25))
-#define MUL (HI(0) | (2 << 6) | LO(24))
-#define MULU (HI(0) | (2 << 6) | LO(25))
-#else /* !SLJIT_MIPS_R6 */
+#define MUL_S (HI(17) | FMT_S | LO(2))
#define MULT (HI(0) | LO(24))
#define MULTU (HI(0) | LO(25))
-#endif /* SLJIT_MIPS_R6 */
-#define MUL_S (HI(17) | FMT_S | LO(2))
#define NEG_S (HI(17) | FMT_S | LO(7))
#define NOP (HI(0) | LO(0))
#define NOR (HI(0) | LO(39))
@@ -251,18 +188,14 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
#define CLZ (HI(28) | LO(32))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define DCLZ (LO(18))
-#else /* !SLJIT_MIPS_R6 */
#define DCLZ (HI(28) | LO(36))
#define MOVF (HI(0) | (0 << 16) | LO(1))
#define MOVN (HI(0) | LO(11))
#define MOVT (HI(0) | (1 << 16) | LO(1))
#define MOVZ (HI(0) | LO(10))
#define MUL (HI(28) | LO(2))
-#endif /* SLJIT_MIPS_R6 */
#define PREF (HI(51))
#define PREFX (HI(19) | LO(15))
#define SEB (HI(31) | (16 << 6) | LO(32))
@@ -301,13 +234,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit
static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags)
{
- if (flags & IS_BIT26_COND)
- return (1 << 26);
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
- if (flags & IS_BIT23_COND)
- return (1 << 23);
-#endif /* SLJIT_MIPS_R6 */
- return (1 << 16);
+ return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
}
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
@@ -1148,62 +1075,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return push_inst(compiler, NOP, UNMOVABLE_INS);
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
- FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
-#else /* !SLJIT_CONFIG_MIPS_64 */
- FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULU : MUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
- FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MUHU : MUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
-#endif /* SLJIT_CONFIG_MIPS_64 */
- FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
- return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
-#else /* !SLJIT_MIPS_R6 */
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#else /* !SLJIT_CONFIG_MIPS_64 */
+#else
FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#endif /* SLJIT_CONFIG_MIPS_64 */
+#endif
FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
-#endif /* SLJIT_MIPS_R6 */
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (int_op) {
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
- }
- else {
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DMODU : DMOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
- }
-#else /* !SLJIT_CONFIG_MIPS_64 */
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
- FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
-#endif /* SLJIT_CONFIG_MIPS_64 */
- FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
- return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
-#else /* !SLJIT_MIPS_R6 */
#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* !SLJIT_MIPS_R1 */
+#endif
+
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (int_op)
FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
else
FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#else /* !SLJIT_CONFIG_MIPS_64 */
+#else
FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#endif /* SLJIT_CONFIG_MIPS_64 */
+#endif
+
FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
-#endif /* SLJIT_MIPS_R6 */
}
return SLJIT_SUCCESS;
@@ -1509,7 +1408,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
inst = C_UN_S;
break;
}
- return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1) | C_FD, UNMOVABLE_INS);
+
+ return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1708,30 +1608,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
flags = IS_BIT26_COND; \
delay_check = src;
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-
-#define BR_T() \
- inst = BC1NEZ; \
- flags = IS_BIT23_COND; \
- delay_check = FCSR_FCC;
-#define BR_F() \
- inst = BC1EQZ; \
- flags = IS_BIT23_COND; \
- delay_check = FCSR_FCC;
-
-#else /* !SLJIT_MIPS_R6 */
-
#define BR_T() \
inst = BC1T | JUMP_LENGTH; \
flags = IS_BIT16_COND; \
delay_check = FCSR_FCC;
+
#define BR_F() \
inst = BC1F | JUMP_LENGTH; \
flags = IS_BIT16_COND; \
delay_check = FCSR_FCC;
-#endif /* SLJIT_MIPS_R6 */
-
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
struct sljit_jump *jump;
@@ -2041,11 +1927,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_UNORDERED_F64:
case SLJIT_ORDERED_F64:
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
- FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar));
-#else /* !SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar));
-#endif /* SLJIT_MIPS_R6 */
FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar));
FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar));
src_ar = dst_ar;
diff --git a/dist2/src/sljit/sljitNativePPC_common.c b/dist2/src/sljit/sljitNativePPC_common.c
index b34e3965..5ef4ac96 100644
--- a/dist2/src/sljit/sljitNativePPC_common.c
+++ b/dist2/src/sljit/sljitNativePPC_common.c
@@ -42,7 +42,7 @@ typedef sljit_u32 sljit_ins;
#include <sys/cache.h>
#endif
-#if (defined _CALL_ELF && _CALL_ELF == 2)
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
#endif
diff --git a/dist2/test-driver b/dist2/test-driver
index b8521a48..0218a01f 100755
--- a/dist2/test-driver
+++ b/dist2/test-driver
@@ -1,9 +1,9 @@
#! /bin/sh
# test-driver - basic testsuite driver script.
-scriptversion=2018-03-07.03; # UTC
+scriptversion=2016-01-11.22; # UTC
-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2011-2017 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,7 +16,7 @@ scriptversion=2018-03-07.03; # UTC
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -140,7 +140,7 @@ echo ":copy-in-global-log: $gcopy" >> $trs_file
# Local Variables:
# mode: shell-script
# sh-indentation: 2
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
diff --git a/dist2/testdata/grepoutputCN b/dist2/testdata/grepoutputCN
deleted file mode 100644
index 5217b5a1..00000000
--- a/dist2/testdata/grepoutputCN
+++ /dev/null
@@ -1,30 +0,0 @@
-The quick brown
-This time it jumps and jumps and jumps.
-This line contains \E and (regex) *meta* [characters].
-The word is cat in this line
-The caterpillar sat on the mat
-The snowcat is not an animal
-The quick brown
-This time it jumps and jumps and jumps.
-This line contains \E and (regex) *meta* [characters].
-The word is cat in this line
-The caterpillar sat on the mat
-The snowcat is not an animal
-0:T
-The quick brown
-0:T
-This time it jumps and jumps and jumps.
-0:T
-This line contains \E and (regex) *meta* [characters].
-0:T
-The word is cat in this line
-0:T
-The caterpillar sat on the mat
-0:T
-The snowcat is not an animal
-T
-T
-T
-T
-T
-T
diff --git a/dist2/testdata/testinput1 b/dist2/testdata/testinput1
index 3fd5d2a6..d8615ee3 100644
--- a/dist2/testdata/testinput1
+++ b/dist2/testdata/testinput1
@@ -6263,84 +6263,4 @@ ef) x/x,mark
aBCDEF
AbCDe f
-/(*pla:foo).{6}/
- abcfoobarxyz
-\= Expect no match
- abcfooba
-
-/(*positive_lookahead:foo).{6}/
- abcfoobarxyz
-
-/(?(*pla:foo).{6}|a..)/
- foobarbaz
- abcfoobar
-
-/(?(*positive_lookahead:foo).{6}|a..)/
- foobarbaz
- abcfoobar
-
-/(*plb:foo)bar/
- abcfoobar
-\= Expect no match
- abcbarfoo
-
-/(*positive_lookbehind:foo)bar/
- abcfoobar
-\= Expect no match
- abcbarfoo
-
-/(?(*plb:foo)bar|baz)/
- abcfoobar
- bazfoobar
- abcbazfoobar
- foobazfoobar
-
-/(?(*positive_lookbehind:foo)bar|baz)/
- abcfoobar
- bazfoobar
- abcbazfoobar
- foobazfoobar
-
-/(*nlb:foo)bar/
- abcbarfoo
-\= Expect no match
- abcfoobar
-
-/(*negative_lookbehind:foo)bar/
- abcbarfoo
-\= Expect no match
- abcfoobar
-
-/(?(*nlb:foo)bar|baz)/
- abcfoobaz
- abcbarbaz
-\= Expect no match
- abcfoobar
-
-/(?(*negative_lookbehind:foo)bar|baz)/
- abcfoobaz
- abcbarbaz
-\= Expect no match
- abcfoobar
-
-/(*atomic:a+)\w/
- aaab
-\= Expect no match
- aaaa
-
-/ (?<word> \w+ )* \. /xi
- pokus.
-
-/(?(DEFINE) (?<word> \w+ ) ) (?&word)* \./xi
- pokus.
-
-/(?(DEFINE) (?<word> \w+ ) ) ( (?&word)* ) \./xi
- pokus.
-
-/(?&word)* (?(DEFINE) (?<word> \w+ ) ) \./xi
- pokus.
-
-/(?&word)* \. (?<word> \w+ )/xi
- pokus.hokus
-
# End of testinput1
diff --git a/dist2/testdata/testinput10 b/dist2/testdata/testinput10
index 4399f829..93d2560c 100644
--- a/dist2/testdata/testinput10
+++ b/dist2/testdata/testinput10
@@ -475,22 +475,5 @@
\x{100}
\= Expect no match
aaa
-
-# Offsets are different in 8-bit mode.
-
-/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
-
-# Check name length with non-ASCII characters
-
-/(?'ABáC678901234567890123456789012'...)/utf
-
-/(?'ABáC6789012345678901234567890123'...)/utf
-
-/(?'ABZC6789012345678901234567890123'...)/utf
-
-/(?(n/utf
-
-/(?(á/utf
# End of testinput10
diff --git a/dist2/testdata/testinput12 b/dist2/testdata/testinput12
index 466fb93c..b0ab9098 100644
--- a/dist2/testdata/testinput12
+++ b/dist2/testdata/testinput12
@@ -381,25 +381,5 @@
\x{100}
\= Expect no match
aaa
-
-# Offsets are different in 8-bit mode.
-
-/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
-
-# A few script run tests in non-UTF mode (but they need Unicode support)
-
-/^(*script_run:.{4})/
- \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
- \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
- \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
-
-/^(*sr:.*)/utf,allow_surrogate_escapes
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check
-
-/(?(n/utf
-
-/(?(á/utf
# End of testinput12
diff --git a/dist2/testdata/testinput17 b/dist2/testdata/testinput17
index 65bbbb94..09441518 100644
--- a/dist2/testdata/testinput17
+++ b/dist2/testdata/testinput17
@@ -299,9 +299,9 @@
# ----
/[aC]/mg,firstline,newline=lf
- match\nmatch
+match\nmatch
/[aCz]/mg,firstline,newline=lf
- match\nmatch
+match\nmatch
# End of testinput17
diff --git a/dist2/testdata/testinput2 b/dist2/testdata/testinput2
index 9e59b628..fc94b35f 100644
--- a/dist2/testdata/testinput2
+++ b/dist2/testdata/testinput2
@@ -2408,13 +2408,13 @@
\= Expect no match
cat
-/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames
+/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
cat
/TA]/
The ACTA] comes
-/TA]/allow_empty_class,match_unset_backref,dupnames
+/TA]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
The ACTA] comes
/(?2)[]a()b](abc)/
@@ -2446,25 +2446,25 @@
/a[^]b/
-/a[]b/allow_empty_class,match_unset_backref,dupnames
+/a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
-/a[]+b/allow_empty_class,match_unset_backref,dupnames
+/a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
-/a[]*+b/allow_empty_class,match_unset_backref,dupnames
+/a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
-/a[^]b/allow_empty_class,match_unset_backref,dupnames
+/a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aXb
a\nb
\= Expect no match
ab
-/a[^]+b/allow_empty_class,match_unset_backref,dupnames
+/a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aXb
a\nX\nXb
\= Expect no match
@@ -2903,10 +2903,10 @@
xxxxabcde\=ps
xxxxabcde\=ph
-/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames
+/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
cat
-/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames
+/(\3)(\1)(a)/I,alt_bsux,allow_empty_class,match_unset_backref,dupnames
cat
/(\3)(\1)(a)/I
@@ -3418,14 +3418,6 @@
aU0041z
\= Expect no match
aAz
-
-/^\u{7a}/alt_bsux
- u{7a}
-\= Expect no match
- zoo
-
-/^\u{7a}/extra_alt_bsux
- zoo
/(?(?=c)c|d)++Y/B
@@ -5312,22 +5304,10 @@ a)"xI
/\N{\c/IB,bad_escape_is_literal
-/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal
+/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
/[Q-\N]/B,bad_escape_is_literal
-/[\s-_]/bad_escape_is_literal
-
-/[_-\s]/bad_escape_is_literal
-
-/[\B\R\X]/B
-
-/[\B\R\X]/B,bad_escape_is_literal
-
-/[A-\BP-\RV-\X]/B
-
-/[A-\BP-\RV-\X]/B,bad_escape_is_literal
-
# ----------------------------------------------------------------------
/a\b(c/literal
@@ -5525,66 +5505,4 @@ a)"xI
bbc
xbc
-/a(b)c|xyz/g,allvector,replace=<$0>
- abcdefabcpqr\=ovector=4
- abxyz\=ovector=4
- abcdefxyz\=ovector=4
-
-/a(b)c|xyz/allvector
- abcdef\=ovector=4
- abxyz\=ovector=4
-
-/a(b)c|xyz/g,replace=<$0>,substitute_callout
- abcdefabcpqr
- abxyzpqrabcxyz
- 12abc34xyz99abc55\=substitute_stop=2
- 12abc34xyz99abc55\=substitute_skip=1
- 12abc34xyz99abc55\=substitute_skip=2
-
-/a(b)c|xyz/g,replace=<$0>
- abcdefabcpqr
- abxyzpqrabcxyz
- 12abc34xyz\=substitute_stop=2
- 12abc34xyz\=substitute_skip=1
-
-/a(b)c|xyz/replace=<$0>
- abcdefabcpqr
- 12abc34xyz\=substitute_skip=1
- 12abc34xyz\=substitute_stop=1
-
-/abc\rdef/
- abc\ndef
-
-/abc\rdef\x{0d}xyz/escaped_cr_is_lf
- abc\ndef\rxyz
-\= Expect no match
- abc\ndef\nxyz
-
-/(?(*ACCEPT)xxx)/
-
-/(?(*atomic:xx)xxx)/
-
-/(?(*script_run:xxx)zzz)/
-
-/foobar/
- the foobar thing\=copy_matched_subject
- the foobar thing\=copy_matched_subject,zero_terminate
-
-/foobar/g
- the foobar thing foobar again\=copy_matched_subject
-
-/(*:XX)^abc/I
-
-/(*COMMIT:XX)^abc/I
-
-/(*ACCEPT:XX)^abc/I
-
-/abc/replace=xyz
- abc\=null_context
-
-/abc/replace=xyz,substitute_callout
- abc
-\= Expect error message
- abc\=null_context
-
# End of testinput2
diff --git a/dist2/testdata/testinput4 b/dist2/testdata/testinput4
index cccab0e5..a27b6afc 100644
--- a/dist2/testdata/testinput4
+++ b/dist2/testdata/testinput4
@@ -2317,167 +2317,5 @@
/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf
\x{99}\x{99}\x{99}
-
-# Script run tests
-
-/^(*script_run:.{4})/utf
- abcd Latin x4
- \x{2e80}\x{2fa1d}\x{3041}\x{30a1} Han Han Hiragana Katakana
- \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
- \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
- \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
- \x{2e80}\x{3105}\x{2e80}\x{3105} Han Bopomofo Han Bopomofo
- \x{02ea}\x{2e80}\x{2e80}\x{3105} Bopomofo-Sk Han Han Bopomofo
- \x{3105}\x{2e80}\x{2e80}\x{3105} Bopomofo Han Han Bopomofo
- \x{0300}cd! Inherited Latin Latin Common
- \x{0391}12\x{03a9} Greek Common-digits Greek
- \x{0400}12\x{fe2f} Cyrillic Common-digits Cyrillic
- \x{0531}12\x{fb17} Armenian Common-digits Armenian
- \x{0591}12\x{fb4f} Hebrew Common-digits Hebrew
- \x{0600}12\x{1eef1} Arabic Common-digits Arabic
- \x{0600}\x{0660}\x{0669}\x{1eef1} Arabic Arabic-digits Arabic
- \x{0700}12\x{086a} Syriac Common-digits Syriac
- \x{1200}12\x{ab2e} Ethiopic Common-digits Ethiopic
- \x{1680}12\x{169c} Ogham Common-digits Ogham
- \x{3041}12\x{3041} Hiragana Common-digits Hiragana
- \x{0980}\x{09e6}\x{09e7}\x{0993} Bengali Bengali-digits Bengali
- !cde Common Latin Latin Latin
- A..B Latin Common Common Latin
- 0abc Ascii-digit Latin Latin Latin
- 1\x{0700}\x{0700}\x{0700} Ascii-digit Syriac x 3
- \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters
-\= Expect no match
- a\x{370}bcd Latin Greek Latin Latin
- \x{1100}\x{02ea}\x{02ea}\x{02ea} Hangul Bopomofo x3
- \x{02ea}\x{02ea}\x{02ea}\x{1100} Bopomofo x3 Hangul
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
- \x{0391}\x{09e6}\x{09e7}\x{03a9} Greek Bengali digits Greek
- \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic
- \x{0600}\x{0669}7\x{1eef1} Arabic Arabic-digit ascii-digit Arabic
- A5\x{ff19}B Latin Common-ascii/notascii-digits Latin
- \x{0300}cd\x{0391} Inherited Latin Latin Greek
- !cd\x{0391} Common Latin Latin Greek
- \x{1A80}\x{1A90}\x{1a40}\x{1a41} Tai Tham Hora digit, Tham digit, letters
- A\x{1d7ce}\x{1d7ff}B Common fancy-common-2-sets-digits Common
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-
-/^(*sr:.{4}|..)/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-
-/^(*atomic_script_run:.{4}|..)/utf
-\= Expect no match
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-
-/^(*asr:.*)/utf
-\= Expect no match
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-
-/^(?>(*sr:.*))/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-
-/^(*sr:.*)/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- \x{10fffd}\x{10fffd}\x{10fffd} Private use (Unknown)
-
-/^(*sr:\x{2e80}*)/utf
- \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo
-
-/^(*sr:\x{2e80}*)\x{2e80}/utf
- \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo
-
-/^(*sr:.*)Test/utf
- Test script run on an empty string
-
-/^(*sr:(.{2})){2}/utf
- \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic
- \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters
- \x{1A80}\x{1a40}\x{1A90}\x{1a41} Tai Tham Hora digit, letter, Tham digit, letter
-\= Expect no match
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
-
-/^(*sr:\S*)/utf
- \x{1cf4}\x{20f0}\x{900}\x{11305} [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Gran
- \x{1cf4}\x{20f0}\x{11305}\x{900} [Dev,Gran,Kan] [Dev,Gran,Lat] Gran Dev
- \x{1cf4}\x{20f0}\x{900}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Lat
- \x{1cf4}\x{20f0}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Lat
- \x{20f0}ABC [Dev,Gran,Lat] Lat
- XYZ\x{20f0}ABC Lat [Dev,Gran,Lat] Lat
- \x{a36}\x{a33}\x{900} [Dev,...] [Dev,...] Dev
- \x{3001}\x{2e80}\x{3041}\x{30a1} [Bopo, Han, etc] Han Hira Kata
- \x{3001}\x{30a1}\x{2e80}\x{3041} [Bopo, Han, etc] Kata Han Hira
- \x{3001}\x{3105}\x{2e80}\x{1101} [Bopo, Han, etc] Bopomofo Han Hangul
- \x{3105}\x{3001}\x{2e80}\x{1101} Bopomofo [Bopo, Han, etc] Han Hangul
- \x{3031}\x{3041}\x{30a1}\x{2e80} [Hira Kata] Hira Kata Han
- \x{060c}\x{06d4}\x{0600}\x{10d00}\x{0700} [Arab Rohg Syrc Thaa] [Arab Rohg] Arab Rohg Syrc
- \x{060c}\x{06d4}\x{0700}\x{0600}\x{10d00} [Arab Rohg Syrc Thaa] [Arab Rohg] Syrc Arab Rohg
- \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80} Han Hira [Bopo, Han, etc] [Hira Kata] Han
-
-/(?<!)(*sr:)/
-
-/(?<!X(*sr:B)C)/
-
-/(?<=abc(?=X(*sr:BCY)Z)XBCYZ)./
- abcXBCYZ!
-
-/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./
- abcXBXYCCC!
-
-/^(*sr:\S*)/utf
- \x{10d00}\x{10d00}\x{06d4} Rohingya Rohingya Arabic-full-stop
- \x{06d4}\x{10d00}\x{10d00} Arabic-full-stop Rohingya Rohingya
- \x{10d00}\x{10d00}\x{0363} Rohingya Rohingya Inherited-extend-Latin
- \x{0363}\x{10d00}\x{10d00} Inherited-extend-Latin Rohingya Rohingya
- AB\x{0363} Latin Latin Inherited-extend-Latin
- \x{0363}AB Inherited-extend-Latin Latin Latin
- AB\x{1cf7} Latin Latin Common-extended-Beng
- \x{1cf7}AB Common-extend-Beng Latin Latin
- \x{1cf7}\x{0993} Common-extend-Beng Bengali
- A\x{1abe}BC Test enclosing mark
- \x{0370}\x{1abe}\x{0371} Which can occur with any script (Greek here)
- \x{3001}\x{adf9}\x{3001} [.. Hangul ..] Hangul [.. Hangul ..]
- \x{3400}\x{3001}XXX Han [Han etc.]
- \x{3400}\x{1cd5} Han [Bengali Devanagari]
- \x{ac01}\x{3400} Hangul [.. Hangul ..]
- \x{ac01}\x{1cd5} Hangul [Bengali Devanagari]
- \x{102e0}\x{06d4}\x{1ee4d} [Arabic Coptic] [Arab Rohingya] Arabic
- \x{102e0}\x{06d4}\x{2cc9} [Arabic Coptic] [Arab Rohingya] Coptic
- \x{102e0}\x{06d4}\x{10d30} [Arabic Coptic] [Arab Rohingya] Rohingya
-
-# Test loop breaking for empty string match
-
-/^(*sr:A|)*BCD/utf
- AABCD
- ABCD
- BCD
-
-# The use of (*ACCEPT) breaks script run checking
-
-/^(*sr:.*(*ACCEPT)ZZ)/utf
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
-
-# -------
-
-# Test group names containing non-ASCII letters and digits
-
-/(?'ABáC'...)\g{ABáC}/utf
- abcabcdefg
-
-/(?'XÊ°ABC'...)/utf
- xyzpq
-
-/(?'X×ABC'...)/utf
- 12345
-
-/(?'XᾈABC'...)/utf
- %^&*(...
-
-/(?'ð¨ABC'...)/utf
- abcde
-
-/^(?'×ABC'...)(?&×ABC)(?P=×ABC)/utf
- 123123123456
-
-/^(?'×ABC'...)(?&×ABC)/utf
- 123123123456
# End of testinput4
diff --git a/dist2/testdata/testinput5 b/dist2/testdata/testinput5
index 7c581450..687de321 100644
--- a/dist2/testdata/testinput5
+++ b/dist2/testdata/testinput5
@@ -333,13 +333,13 @@
/[[:a\x{100}b:]]/utf
-/a[^]b/utf,allow_empty_class,match_unset_backref
+/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
a\x{1234}b
a\nb
\= Expect no match
ab
-/a[^]+b/utf,allow_empty_class,match_unset_backref
+/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
aXb
a\nX\nX\x{1234}b
\= Expect no match
@@ -814,12 +814,6 @@
/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
-/^\u{0000000000010ffff}/utf,extra_alt_bsux
- \x{10ffff}
-
-/\u/utf,alt_bsux
- \\u
-
/^a+[a\x{200}]/B,utf
aa
@@ -2112,66 +2106,5 @@
/(*: A‎B
C)abc/x,utf,mark,alt_verbnames
abc
-
-# Script run tests: auto-possessification
-
-/^(*sr:.*)/B,utf
- paypаl.com A classic example of why script run checks are a good thing
-
-/^(*sr:.*(*ACCEPT))/utf
- paypаl.com But *ACCEPT breaks things
-
-/^(*sr:\x{2e80}*)/B,utf
-
-/^(*sr:\x{2e80}*)\x{2e80}/B,utf
-
-/(?<!)(*sr:)/B
-
-/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
- abcXBXYCCC!
-
-# Some script run patterns are broken in Perl 5.28.0. These can be moved into
-# test 4 when a mended version of Perl is released.
-
-/^(*sr:.{4})/utf
- \x{0980}12\x{0993} Bengali Common-digits Bengali
- \x{0780}12\x{07b1} Thaana Common-digits Thaana
- \x{0e01}12\x{0e5b} Thai Common-digits Thai
- \x{1780}12\x{19ff} Khmer Common-digits Khmer
- \x{0904}12\x{0939} Devanagari Common-digits Devanagari
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
-
-# These ones involve non-ASCII but nevertheless Common digits. As of October
-# 2018 even blead Perl wasn't handling all of these - but is going to.
-
-/^(*sr:.{4})/utf
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
- A\x{ff10}BC Latin Common-notascii-digit Latin Latin
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
- \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
- A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
-
-# -------
-
-# Test reference and errors in non-ASCII characters in group names
-
-/(?'ð‘ …ABC'...)/I,utf
- abcde\=copy=ð‘ …ABC
-
-# Bad ones
-
-/(?'ABáŒC'...)\g{ABáŒC}/utf
-
-/(?'Ù ABC'...)/utf
-
-/(?'²ABC'...)/utf
-
-/(?'X²ABC'...)/utf
-
-# -------
-
-/\p{Any}*xyz/I
# End of testinput5
diff --git a/dist2/testdata/testinput6 b/dist2/testdata/testinput6
index 15dfb74b..f7dedb21 100644
--- a/dist2/testdata/testinput6
+++ b/dist2/testdata/testinput6
@@ -4955,21 +4955,4 @@
\= Expect no match
\na
-/foobar/
- the foobar thing\=copy_matched_subject
- the foobar thing\=copy_matched_subject,zero_terminate
-
-/foobar/g
- the foobar thing foobar again\=copy_matched_subject
-
-/(?(VERSION>=0)^B0W)/
- B0W-W0W
-\= Expect no match
- 0
-
-/(?(VERSION>=1000)^B0W|W0W)/
- B0W-W0W
-\= Expect no match
- 0
-
# End of testinput6
diff --git a/dist2/testdata/testoutput1 b/dist2/testdata/testoutput1
index 8fd67b34..77b9ff00 100644
--- a/dist2/testdata/testoutput1
+++ b/dist2/testdata/testoutput1
@@ -9929,124 +9929,4 @@ No match
AbCDe f
No match
-/(*pla:foo).{6}/
- abcfoobarxyz
- 0: foobar
-\= Expect no match
- abcfooba
-No match
-
-/(*positive_lookahead:foo).{6}/
- abcfoobarxyz
- 0: foobar
-
-/(?(*pla:foo).{6}|a..)/
- foobarbaz
- 0: foobar
- abcfoobar
- 0: abc
-
-/(?(*positive_lookahead:foo).{6}|a..)/
- foobarbaz
- 0: foobar
- abcfoobar
- 0: abc
-
-/(*plb:foo)bar/
- abcfoobar
- 0: bar
-\= Expect no match
- abcbarfoo
-No match
-
-/(*positive_lookbehind:foo)bar/
- abcfoobar
- 0: bar
-\= Expect no match
- abcbarfoo
-No match
-
-/(?(*plb:foo)bar|baz)/
- abcfoobar
- 0: bar
- bazfoobar
- 0: baz
- abcbazfoobar
- 0: baz
- foobazfoobar
- 0: bar
-
-/(?(*positive_lookbehind:foo)bar|baz)/
- abcfoobar
- 0: bar
- bazfoobar
- 0: baz
- abcbazfoobar
- 0: baz
- foobazfoobar
- 0: bar
-
-/(*nlb:foo)bar/
- abcbarfoo
- 0: bar
-\= Expect no match
- abcfoobar
-No match
-
-/(*negative_lookbehind:foo)bar/
- abcbarfoo
- 0: bar
-\= Expect no match
- abcfoobar
-No match
-
-/(?(*nlb:foo)bar|baz)/
- abcfoobaz
- 0: baz
- abcbarbaz
- 0: bar
-\= Expect no match
- abcfoobar
-No match
-
-/(?(*negative_lookbehind:foo)bar|baz)/
- abcfoobaz
- 0: baz
- abcbarbaz
- 0: bar
-\= Expect no match
- abcfoobar
-No match
-
-/(*atomic:a+)\w/
- aaab
- 0: aaab
-\= Expect no match
- aaaa
-No match
-
-/ (?<word> \w+ )* \. /xi
- pokus.
- 0: pokus.
- 1: pokus
-
-/(?(DEFINE) (?<word> \w+ ) ) (?&word)* \./xi
- pokus.
- 0: pokus.
-
-/(?(DEFINE) (?<word> \w+ ) ) ( (?&word)* ) \./xi
- pokus.
- 0: pokus.
- 1: <unset>
- 2: pokus
-
-/(?&word)* (?(DEFINE) (?<word> \w+ ) ) \./xi
- pokus.
- 0: pokus.
-
-/(?&word)* \. (?<word> \w+ )/xi
- pokus.hokus
- 0: pokus.hokus
- 1: hokus
-
# End of testinput1
diff --git a/dist2/testdata/testoutput10 b/dist2/testdata/testoutput10
index dfecda1c..9660fc5a 100644
--- a/dist2/testdata/testoutput10
+++ b/dist2/testdata/testoutput10
@@ -248,7 +248,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -261,7 +261,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe1
Last code unit = \x80
@@ -274,7 +274,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xf0
Last code unit = \x80
@@ -287,7 +287,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xf4
Last code unit = \x80
@@ -300,7 +300,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xf4
Last code unit = \xbf
@@ -313,7 +313,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc3
Last code unit = \xbf
@@ -326,7 +326,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -339,7 +339,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc2
Last code unit = \x80
@@ -352,7 +352,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc3
Last code unit = \xbf
@@ -365,7 +365,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xed
Last code unit = \xb4
@@ -380,7 +380,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe6
Last code unit = \x9e
@@ -395,7 +395,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc2
Last code unit = \x80
@@ -408,7 +408,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc2
Last code unit = \x84
@@ -421,7 +421,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x84
@@ -434,7 +434,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe0
Last code unit = \xa1
@@ -447,7 +447,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xf0
Last code unit = \xab
@@ -460,7 +460,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -495,7 +495,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -514,7 +514,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xc4
Subject length lower bound = 1
@@ -531,7 +531,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xc4
Subject length lower bound = 1
@@ -548,7 +548,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xc4
Subject length lower bound = 1
@@ -566,7 +566,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xc4
Subject length lower bound = 1
@@ -578,7 +578,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -592,7 +592,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x80
@@ -606,7 +606,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x81
@@ -619,7 +619,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\x{100}]/IB,utf
@@ -629,7 +629,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -648,7 +648,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc3
Last code unit = \xbf
@@ -663,7 +663,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -678,14 +678,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
First code unit = \xc4
Last code unit = 'z'
Subject length lower bound = 7
/\777/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc7
Last code unit = \xbf
@@ -703,7 +703,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = \x80
@@ -717,7 +717,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc4
Last code unit = 'X'
@@ -761,7 +761,7 @@ No match
0: \x{1234}
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: utf
\R matches any Unicode newline
@@ -771,7 +771,7 @@ Last code unit = 'b'
Subject length lower bound = 3
/\h/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3
Subject length lower bound = 1
@@ -795,7 +795,7 @@ Subject length lower bound = 1
0: \x{3000}
/\v/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
Subject length lower bound = 1
@@ -813,7 +813,7 @@ Subject length lower bound = 1
0: \x{2028}
/\h*A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
Last code unit = 'A'
@@ -822,21 +822,21 @@ Subject length lower bound = 1
0: A
/\v+A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
Last code unit = 'A'
Subject length lower bound = 2
/\s?xxx\s/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
Last code unit = 'x'
Subject length lower bound = 4
/\sxxx\s/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
Last code unit = 'x'
@@ -847,7 +847,7 @@ Subject length lower bound = 5
0: \x{a0}xxx\x{85}
/\S \S/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -883,25 +883,25 @@ Error -36 (bad UTF-8 offset)
No match
/\x{1234}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xe1
Subject length lower bound = 1
/\x{1234}+?/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xe1
Subject length lower bound = 1
/\x{1234}++/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xe1
Subject length lower bound = 1
/\x{1234}{2}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xe1
Subject length lower bound = 2
@@ -913,7 +913,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -925,14 +925,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'X'
Last code unit = \x80
Subject length lower bound = 2
/\R/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
Subject length lower bound = 1
@@ -944,7 +944,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xc7
Last code unit = \xbf
@@ -1105,7 +1105,7 @@ Failed: error 174 at offset 0: using UTF is disabled by the application
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Subject length lower bound = 5
@@ -1117,7 +1117,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \xb0
@@ -1130,7 +1130,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \xb0
@@ -1143,14 +1143,14 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = 'B' (caseless)
Subject length lower bound = 3
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xd0 \xd1
Subject length lower bound = 17
@@ -1176,17 +1176,17 @@ Subject length lower bound = 17
------------------------------------------------------------------
/\h/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0
Subject length lower bound = 1
/\v/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85
Subject length lower bound = 1
/\R/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85
Subject length lower bound = 1
@@ -1199,7 +1199,7 @@ Subject length lower bound = 1
------------------------------------------------------------------
/\x{212a}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: K k \xe2
Subject length lower bound = 1
@@ -1207,7 +1207,7 @@ Subject length lower bound = 1
0: KKkk\x{212a}
/s+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: S s \xc5
Subject length lower bound = 1
@@ -1222,7 +1222,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: A \xc4
Last code unit = 'A'
@@ -1239,7 +1239,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
Subject length lower bound = 1
@@ -1251,7 +1251,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: Z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
@@ -1273,7 +1273,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
\xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
@@ -1289,7 +1289,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: - ] a d z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
@@ -1314,7 +1314,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a b \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
@@ -1332,7 +1332,7 @@ Subject length lower bound = 7
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4
Subject length lower bound = 1
@@ -1345,7 +1345,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
Subject length lower bound = 1
@@ -1358,7 +1358,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -1373,7 +1373,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1395,7 +1395,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -1416,7 +1416,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1435,7 +1435,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
@@ -1462,7 +1462,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8
\xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7
@@ -1503,7 +1503,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8
\xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7
@@ -1520,7 +1520,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xce \xcf
Last code unit = 'B' (caseless)
@@ -1531,7 +1531,7 @@ Subject length lower bound = 2
Failed: error -3: UTF-8 error: 1 byte missing at end
/(?<=(a)(?-1))x/I,utf
-Capture group count = 1
+Capturing subpattern count = 1
Max lookbehind = 2
Options: utf
First code unit = 'x'
@@ -1579,7 +1579,7 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
# but subjects containing them must not be UTF-checked.
/\x{d800}/I,utf,allow_surrogate_escapes
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \xed
@@ -1602,7 +1602,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1625,30 +1625,5 @@ Subject length lower bound = 1
\= Expect no match
aaa
No match
-
-# Offsets are different in 8-bit mode.
-
-/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
- 1(2) Old 6 6 "" New 6 8 "<>"
- 2(2) Old 13 13 "" New 15 17 "<>"
- 3(2) Old 13 16 "def" New 17 22 "<def>"
- 4(2) Old 22 22 "" New 28 30 "<>"
- 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
-
-# Check name length with non-ASCII characters
-
-/(?'ABáC678901234567890123456789012'...)/utf
-
-/(?'ABáC6789012345678901234567890123'...)/utf
-Failed: error 148 at offset 36: subpattern name is too long (maximum 32 code units)
-
-/(?'ABZC6789012345678901234567890123'...)/utf
-
-/(?(n/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
-
-/(?(á/utf
-Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
# End of testinput10
diff --git a/dist2/testdata/testoutput11-16 b/dist2/testdata/testoutput11-16
index 78bf7fbf..e22581d6 100644
--- a/dist2/testdata/testoutput11-16
+++ b/dist2/testdata/testoutput11-16
@@ -13,11 +13,11 @@
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/\x{100}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{100}
Subject length lower bound = 1
@@ -215,7 +215,7 @@ Subject length lower bound = 1
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
@@ -260,7 +260,7 @@ Subject length lower bound = 3
------------------------------------------------------------------
/\h+/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
@@ -275,7 +275,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
@@ -284,7 +284,7 @@ Subject length lower bound = 1
0: \x{200a}\xa0\x{2000}
/\H+/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
@@ -306,7 +306,7 @@ Subject length lower bound = 1
0: \x9f\xa1\x{2fff}\x{3001}
/\v+/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
@@ -321,7 +321,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
@@ -330,7 +330,7 @@ Subject length lower bound = 1
0: \x85\x0a\x0b\x0c\x0d
/\V+/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
@@ -344,7 +344,7 @@ Subject length lower bound = 1
0: \x09\x0e\x84\x86
/\R+/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -354,7 +354,7 @@ Subject length lower bound = 1
0: \x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{d800}
Last code unit = \x{dd00}
Subject length lower bound = 6
@@ -600,7 +600,7 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
@@ -624,7 +624,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
diff --git a/dist2/testdata/testoutput11-32 b/dist2/testdata/testoutput11-32
index 4b00384f..d8a909fc 100644
--- a/dist2/testdata/testoutput11-32
+++ b/dist2/testdata/testoutput11-32
@@ -13,11 +13,11 @@
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/\x{100}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{100}
Subject length lower bound = 1
@@ -215,7 +215,7 @@ Subject length lower bound = 1
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
@@ -260,7 +260,7 @@ Subject length lower bound = 3
------------------------------------------------------------------
/\h+/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
@@ -275,7 +275,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
@@ -284,7 +284,7 @@ Subject length lower bound = 1
0: \x{200a}\xa0\x{2000}
/\H+/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
@@ -306,7 +306,7 @@ Subject length lower bound = 1
0: \x9f\xa1\x{2fff}\x{3001}
/\v+/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
@@ -321,7 +321,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
@@ -330,7 +330,7 @@ Subject length lower bound = 1
0: \x85\x0a\x0b\x0c\x0d
/\V+/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
@@ -344,7 +344,7 @@ Subject length lower bound = 1
0: \x09\x0e\x84\x86
/\R+/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -354,7 +354,7 @@ Subject length lower bound = 1
0: \x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{d800}
Last code unit = \x{dd00}
Subject length lower bound = 6
@@ -558,19 +558,19 @@ Failed: error 134 at offset 12: character code point value in \x{} or \o{} is to
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\x{7fffffff}\x{7fffffff}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{7fffffff}
Last code unit = \x{7fffffff}
Subject length lower bound = 2
/\x{80000000}\x{80000000}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{80000000}
Last code unit = \x{80000000}
Subject length lower bound = 2
/\x{ffffffff}\x{ffffffff}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \x{ffffffff}
Last code unit = \x{ffffffff}
Subject length lower bound = 2
@@ -588,7 +588,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = \x{400000}
Last code unit = \x{800000}
@@ -603,7 +603,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
@@ -627,7 +627,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
diff --git a/dist2/testdata/testoutput12-16 b/dist2/testdata/testoutput12-16
index 3f2c91fe..52dbe74b 100644
--- a/dist2/testdata/testoutput12-16
+++ b/dist2/testdata/testoutput12-16
@@ -18,7 +18,7 @@
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{ffff}
Subject length lower bound = 1
@@ -30,7 +30,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d800}
Last code unit = \x{dc00}
@@ -43,7 +43,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -55,7 +55,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{1000}
Subject length lower bound = 1
@@ -67,7 +67,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d800}
Last code unit = \x{dc00}
@@ -80,7 +80,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{dbc0}
Last code unit = \x{dc00}
@@ -93,7 +93,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{dbff}
Last code unit = \x{dfff}
@@ -106,7 +106,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -118,7 +118,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -130,7 +130,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x80
Subject length lower bound = 1
@@ -142,7 +142,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -154,7 +154,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -169,7 +169,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -184,7 +184,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x80
Subject length lower bound = 1
@@ -196,7 +196,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x84
Subject length lower bound = 1
@@ -208,7 +208,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{104}
Subject length lower bound = 1
@@ -220,7 +220,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{861}
Subject length lower bound = 1
@@ -232,7 +232,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d844}
Last code unit = \x{deab}
@@ -245,7 +245,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -281,7 +281,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = \x{100}
@@ -300,7 +300,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xff
Subject length lower bound = 1
@@ -317,7 +317,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xff
Subject length lower bound = 1
@@ -334,7 +334,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xff
Subject length lower bound = 1
@@ -352,7 +352,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xff
Subject length lower bound = 1
@@ -364,7 +364,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -377,7 +377,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x{100}
@@ -391,7 +391,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x{101}
@@ -404,7 +404,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\x{100}]/IB,utf
@@ -414,7 +414,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -432,7 +432,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -446,7 +446,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -461,14 +461,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
First code unit = \x{100}
Last code unit = 'z'
Subject length lower bound = 7
/\777/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{1ff}
Subject length lower bound = 1
@@ -485,7 +485,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = \x{200}
@@ -499,7 +499,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = 'X'
@@ -547,7 +547,7 @@ Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
0: \x{11234}
/(*UTF)\x{11234}/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: utf
First code unit = \x{d804}
@@ -565,7 +565,7 @@ Failed: error 160 at offset 5: (*VERB) not recognized or malformed
abcd\x{11234}pqr
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: utf
\R matches any Unicode newline
@@ -575,10 +575,10 @@ Last code unit = 'b'
Subject length lower bound = 3
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
-Failed: error 160 at offset 14: (*VERB) not recognized or malformed
+Failed: error 160 at offset 12: (*VERB) not recognized or malformed
/\h/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
@@ -602,7 +602,7 @@ Subject length lower bound = 1
0: \x{3000}
/\v/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -620,7 +620,7 @@ Subject length lower bound = 1
0: \x{2028}
/\h*A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 A \xa0 \xff
Last code unit = 'A'
@@ -631,7 +631,7 @@ Subject length lower bound = 1
0: \x{2000}A
/\R*A/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
@@ -643,21 +643,21 @@ Subject length lower bound = 1
0: \x{2028}A
/\v+A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Last code unit = 'A'
Subject length lower bound = 2
/\s?xxx\s/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
Last code unit = 'x'
Subject length lower bound = 4
/\sxxx\s/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
Last code unit = 'x'
@@ -668,7 +668,7 @@ Subject length lower bound = 5
0: \x{a0}xxx\x{85}
/\S \S/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -708,25 +708,25 @@ Failed: error -33: bad offset value
Failed: error -33: bad offset value
/\x{1234}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}+?/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}++/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}{2}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Last code unit = \x{1234}
@@ -739,7 +739,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -751,14 +751,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'X'
Last code unit = \x{200}
Subject length lower bound = 2
/\R/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -936,7 +936,7 @@ Failed: error 174 at offset 0: using UTF is disabled by the application
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = \x{1fb0} (caseless)
@@ -949,7 +949,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \x{1fb0}
@@ -962,7 +962,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \x{1fb0}
@@ -975,14 +975,14 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = \x{1fb0} (caseless)
Subject length lower bound = 3
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{401} (caseless)
Last code unit = \x{42f} (caseless)
@@ -1017,7 +1017,7 @@ Subject length lower bound = 17
------------------------------------------------------------------
/\x{212a}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: K k \xff
Subject length lower bound = 1
@@ -1025,7 +1025,7 @@ Subject length lower bound = 1
0: KKkk\x{212a}
/s+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: S s \xff
Subject length lower bound = 1
@@ -1048,7 +1048,7 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: A \xff
Last code unit = 'A'
@@ -1065,7 +1065,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
Subject length lower bound = 1
@@ -1077,7 +1077,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: Z \xff
Subject length lower bound = 1
@@ -1095,7 +1095,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
\x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
@@ -1115,7 +1115,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: - ] a d z \xff
Subject length lower bound = 1
@@ -1136,7 +1136,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a b \xff
Last code unit = 'z'
@@ -1150,7 +1150,7 @@ Subject length lower bound = 7
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
Subject length lower bound = 1
@@ -1163,7 +1163,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
Subject length lower bound = 1
@@ -1176,7 +1176,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -1191,7 +1191,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1217,7 +1217,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -1243,7 +1243,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1266,7 +1266,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xff
Subject length lower bound = 1
@@ -1289,7 +1289,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
@@ -1335,7 +1335,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
@@ -1357,7 +1357,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xff
Last code unit = 'B' (caseless)
@@ -1443,7 +1443,7 @@ Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowe
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1470,36 +1470,5 @@ Subject length lower bound = 1
\= Expect no match
aaa
No match
-
-# Offsets are different in 8-bit mode.
-
-/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
- 1(2) Old 6 6 "" New 6 8 "<>"
- 2(2) Old 12 12 "" New 14 16 "<>"
- 3(2) Old 12 15 "def" New 16 21 "<def>"
- 4(2) Old 21 21 "" New 27 29 "<>"
- 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
-
-# A few script run tests in non-UTF mode (but they need Unicode support)
-
-/^(*script_run:.{4})/
- \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
- 0: \x{3041}\x{30a1}\x{3007}\x{3007}
- \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
- 0: \x{30a1}\x{3041}\x{3007}\x{3007}
- \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
- 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
-
-/^(*sr:.*)/utf,allow_surrogate_escapes
-Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check
-
-/(?(n/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
-
-/(?(á/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
# End of testinput12
diff --git a/dist2/testdata/testoutput12-32 b/dist2/testdata/testoutput12-32
index cb901907..38ff92dc 100644
--- a/dist2/testdata/testoutput12-32
+++ b/dist2/testdata/testoutput12-32
@@ -18,7 +18,7 @@
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{ffff}
Subject length lower bound = 1
@@ -30,7 +30,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{10000}
Subject length lower bound = 1
@@ -42,7 +42,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -54,7 +54,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{1000}
Subject length lower bound = 1
@@ -66,7 +66,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{10000}
Subject length lower bound = 1
@@ -78,7 +78,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100000}
Subject length lower bound = 1
@@ -90,7 +90,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{10ffff}
Subject length lower bound = 1
@@ -102,7 +102,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -114,7 +114,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -126,7 +126,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x80
Subject length lower bound = 1
@@ -138,7 +138,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -150,7 +150,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -165,7 +165,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -180,7 +180,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x80
Subject length lower bound = 1
@@ -192,7 +192,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x84
Subject length lower bound = 1
@@ -204,7 +204,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{104}
Subject length lower bound = 1
@@ -216,7 +216,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{861}
Subject length lower bound = 1
@@ -228,7 +228,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{212ab}
Subject length lower bound = 1
@@ -240,7 +240,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -276,7 +276,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = \x{100}
@@ -295,7 +295,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xff
Subject length lower bound = 1
@@ -312,7 +312,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xff
Subject length lower bound = 1
@@ -329,7 +329,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a x \xff
Subject length lower bound = 1
@@ -347,7 +347,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: x \xff
Subject length lower bound = 1
@@ -359,7 +359,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -372,7 +372,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x{100}
@@ -386,7 +386,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = \x{101}
@@ -399,7 +399,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\x{100}]/IB,utf
@@ -409,7 +409,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Subject length lower bound = 1
@@ -427,7 +427,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xff
Subject length lower bound = 1
@@ -441,7 +441,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -456,14 +456,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
First code unit = \x{100}
Last code unit = 'z'
Subject length lower bound = 7
/\777/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{1ff}
Subject length lower bound = 1
@@ -480,7 +480,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = \x{200}
@@ -494,7 +494,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{100}
Last code unit = 'X'
@@ -538,11 +538,11 @@ No match
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
/(*UTF16)\x{11234}/
-Failed: error 160 at offset 7: (*VERB) not recognized or malformed
+Failed: error 160 at offset 5: (*VERB) not recognized or malformed
abcd\x{11234}pqr
/(*UTF)\x{11234}/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: utf
First code unit = \x{11234}
@@ -559,10 +559,10 @@ Failed: error 160 at offset 5: (*VERB) not recognized or malformed
abcd\x{11234}pqr
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
-Failed: error 160 at offset 14: (*VERB) not recognized or malformed
+Failed: error 160 at offset 12: (*VERB) not recognized or malformed
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: utf
\R matches any Unicode newline
@@ -572,7 +572,7 @@ Last code unit = 'b'
Subject length lower bound = 3
/\h/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 \xa0 \xff
Subject length lower bound = 1
@@ -596,7 +596,7 @@ Subject length lower bound = 1
0: \x{3000}
/\v/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -614,7 +614,7 @@ Subject length lower bound = 1
0: \x{2028}
/\h*A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x20 A \xa0 \xff
Last code unit = 'A'
@@ -625,7 +625,7 @@ Subject length lower bound = 1
0: \x{2000}A
/\R*A/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
@@ -637,21 +637,21 @@ Subject length lower bound = 1
0: \x{2028}A
/\v+A/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Last code unit = 'A'
Subject length lower bound = 2
/\s?xxx\s/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
Last code unit = 'x'
Subject length lower bound = 4
/\sxxx\s/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
Last code unit = 'x'
@@ -662,7 +662,7 @@ Subject length lower bound = 5
0: \x{a0}xxx\x{85}
/\S \S/I,utf,tables=2
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -702,25 +702,25 @@ Failed: error -33: bad offset value
Failed: error -33: bad offset value
/\x{1234}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}+?/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}++/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Subject length lower bound = 1
/\x{1234}{2}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{1234}
Last code unit = \x{1234}
@@ -733,7 +733,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -745,14 +745,14 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'X'
Last code unit = \x{200}
Subject length lower bound = 2
/\R/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
Subject length lower bound = 1
@@ -930,7 +930,7 @@ Failed: error 174 at offset 0: using UTF is disabled by the application
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = \x{1fb0} (caseless)
@@ -943,7 +943,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \x{1fb0}
@@ -956,7 +956,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = \x{1fb0}
@@ -969,14 +969,14 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = \x{1fb0} (caseless)
Subject length lower bound = 3
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = \x{401} (caseless)
Last code unit = \x{42f} (caseless)
@@ -1011,7 +1011,7 @@ Subject length lower bound = 17
------------------------------------------------------------------
/\x{212a}+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: K k \xff
Subject length lower bound = 1
@@ -1019,7 +1019,7 @@ Subject length lower bound = 1
0: KKkk\x{212a}
/s+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: S s \xff
Subject length lower bound = 1
@@ -1042,7 +1042,7 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: A \xff
Last code unit = 'A'
@@ -1059,7 +1059,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
Subject length lower bound = 1
@@ -1071,7 +1071,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: Z \xff
Subject length lower bound = 1
@@ -1089,7 +1089,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
\x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
@@ -1109,7 +1109,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: - ] a d z \xff
Subject length lower bound = 1
@@ -1130,7 +1130,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Starting code units: a b \xff
Last code unit = 'z'
@@ -1144,7 +1144,7 @@ Subject length lower bound = 7
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
Subject length lower bound = 1
@@ -1157,7 +1157,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
Subject length lower bound = 1
@@ -1170,7 +1170,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -1185,7 +1185,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1211,7 +1211,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
@@ -1237,7 +1237,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
@@ -1260,7 +1260,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xff
Subject length lower bound = 1
@@ -1283,7 +1283,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
@@ -1329,7 +1329,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
@@ -1351,7 +1351,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Starting code units: \xff
Last code unit = 'B' (caseless)
@@ -1418,7 +1418,7 @@ No match
# errors in 16-bit mode.
/\x{d800}/I,utf,allow_surrogate_escapes
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \x{d800}
@@ -1440,7 +1440,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1467,37 +1467,5 @@ Subject length lower bound = 1
\= Expect no match
aaa
No match
-
-# Offsets are different in 8-bit mode.
-
-/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
- 123abcáyzabcdef789abcሴqr
- 1(2) Old 6 6 "" New 6 8 "<>"
- 2(2) Old 12 12 "" New 14 16 "<>"
- 3(2) Old 12 15 "def" New 16 21 "<def>"
- 4(2) Old 21 21 "" New 27 29 "<>"
- 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
-
-# A few script run tests in non-UTF mode (but they need Unicode support)
-
-/^(*script_run:.{4})/
- \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
- 0: \x{3041}\x{30a1}\x{3007}\x{3007}
- \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
- 0: \x{30a1}\x{3041}\x{3007}\x{3007}
- \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
- 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
-
-/^(*sr:.*)/utf,allow_surrogate_escapes
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- 0: \x{2e80}\x{3105}\x{2e80}
- \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check
- 0: \x{d800}
-
-/(?(n/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
-
-/(?(á/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
# End of testinput12
diff --git a/dist2/testdata/testoutput15 b/dist2/testdata/testoutput15
index d312765b..d09e781c 100644
--- a/dist2/testdata/testoutput15
+++ b/dist2/testdata/testoutput15
@@ -7,7 +7,7 @@
# (2) Other tests that must not be run with JIT.
/(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
@@ -24,7 +24,7 @@ Minimum depth limit = 30
No match
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
/* this is a C style comment */\=find_limits
@@ -117,7 +117,7 @@ Failed: error 160 at offset 17: (*VERB) not recognized or malformed
Failed: error 160 at offset 24: (*VERB) not recognized or malformed
/(*LIMIT_DEPTH=4294967280)abc/I
-Capture group count = 0
+Capturing subpattern count = 0
Depth limit = 4294967280
First code unit = 'a'
Last code unit = 'c'
@@ -137,7 +137,7 @@ Failed: error -47: match limit exceeded
Failed: error -53: matching depth limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 3000
Starting code units: a z
Last code unit = 'z'
@@ -150,7 +150,7 @@ Failed: error -47: match limit exceeded
Failed: error -47: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 3000
Starting code units: a z
Last code unit = 'z'
@@ -160,7 +160,7 @@ Subject length lower bound = 2
Failed: error -47: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 60000
Starting code units: a z
Last code unit = 'z'
@@ -173,7 +173,7 @@ No match
Failed: error -47: match limit exceeded
/(*LIMIT_DEPTH=10)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Depth limit = 10
Starting code units: a z
Last code unit = 'z'
@@ -186,7 +186,7 @@ Failed: error -53: matching depth limit exceeded
Failed: error -53: matching depth limit exceeded
/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Depth limit = 1000
Starting code units: a z
Last code unit = 'z'
@@ -196,7 +196,7 @@ Subject length lower bound = 2
No match
/(*LIMIT_DEPTH=1000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Depth limit = 1000
Starting code units: a z
Last code unit = 'z'
@@ -269,14 +269,14 @@ Failed: error -52: nested recursion at the same subject position
# when JIT is used.
/(?R)/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
abcd
Failed: error -52: nested recursion at the same subject position
/(a|(?R))/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
abcd
@@ -286,7 +286,7 @@ Subject length lower bound = 0
Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?R))))/I
-Capture group count = 3
+Capturing subpattern count = 3
May match empty string
Subject length lower bound = 0
abcd
@@ -296,7 +296,7 @@ Subject length lower bound = 0
Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?1))))/I
-Capture group count = 3
+Capturing subpattern count = 3
May match empty string
Subject length lower bound = 0
abcd
@@ -306,7 +306,7 @@ Subject length lower bound = 0
Failed: error -52: nested recursion at the same subject position
/x(ab|(bc|(de|(?1)x)x)x)/I
-Capture group count = 3
+Capturing subpattern count = 3
First code unit = 'x'
Subject length lower bound = 3
xab123
@@ -352,7 +352,7 @@ Failed: error -52: nested recursion at the same subject position
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 1
abcd
Failed: error -52: nested recursion at the same subject position
@@ -367,7 +367,7 @@ Failed: error -52: nested recursion at the same subject position
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: no_auto_possess
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -390,7 +390,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: no_auto_possess
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
diff --git a/dist2/testdata/testoutput16 b/dist2/testdata/testoutput16
index 78d43bda..616567b5 100644
--- a/dist2/testdata/testoutput16
+++ b/dist2/testdata/testoutput16
@@ -3,14 +3,14 @@
# are different without JIT.
/abc/I,jit,jitverify
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
JIT support is not available in this version of PCRE2
/a*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput17 b/dist2/testdata/testoutput17
index b66cfa32..acf00e09 100644
--- a/dist2/testdata/testoutput17
+++ b/dist2/testdata/testoutput17
@@ -6,7 +6,7 @@
# JIT does not support this pattern (callout at start of condition).
/(?(?C1)(?=a)a)/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
JIT compilation was not successful (no more memory)
@@ -14,7 +14,7 @@ JIT compilation was not successful (no more memory)
# The following pattern cannot be compiled by JIT.
/b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
JIT compilation was not successful (no more memory)
@@ -26,7 +26,7 @@ JIT compilation was not successful (no more memory)
Failed: error -46: JIT stack limit reached
/abcd/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
@@ -38,7 +38,7 @@ JIT compilation was successful
No match (JIT)
/(*NO_JIT)abcd/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
@@ -174,7 +174,7 @@ Partial match: ab (JIT)
No match (JIT)
/abcd/I,jit=2
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
@@ -192,7 +192,7 @@ No match, mark = m (JIT)
# Limits tests that give different output with JIT.
/(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
@@ -207,7 +207,7 @@ Minimum match limit = 16383
No match (JIT)
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
JIT compilation was successful
@@ -294,7 +294,7 @@ No match (JIT)
Failed: error -47: match limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 3000
Starting code units: a z
Last code unit = 'z'
@@ -308,7 +308,7 @@ Failed: error -47: match limit exceeded
Failed: error -47: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 3000
Starting code units: a z
Last code unit = 'z'
@@ -319,7 +319,7 @@ JIT compilation was successful
Failed: error -47: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
-Capture group count = 1
+Capturing subpattern count = 1
Match limit = 60000
Starting code units: a z
Last code unit = 'z'
@@ -371,7 +371,7 @@ No match
/^abc\Kdef/info,push
** Applies only to compile when pattern is stacked with 'push': jitverify
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -383,7 +383,7 @@ JIT compilation was successful
/^abc\Kdef/info,push
** Applies only to compile when pattern is stacked with 'push': jitverify
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -423,7 +423,7 @@ JIT compilation was successful
# when JIT is used.
/(?R)/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
JIT compilation was successful
@@ -431,7 +431,7 @@ JIT compilation was successful
Failed: error -46: JIT stack limit reached
/(a|(?R))/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
JIT compilation was successful
@@ -442,7 +442,7 @@ JIT compilation was successful
Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?R))))/I
-Capture group count = 3
+Capturing subpattern count = 3
May match empty string
Subject length lower bound = 0
JIT compilation was successful
@@ -453,7 +453,7 @@ JIT compilation was successful
Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?1))))/I
-Capture group count = 3
+Capturing subpattern count = 3
May match empty string
Subject length lower bound = 0
JIT compilation was successful
@@ -464,7 +464,7 @@ JIT compilation was successful
Failed: error -46: JIT stack limit reached
/x(ab|(bc|(de|(?1)x)x)x)/I
-Capture group count = 3
+Capturing subpattern count = 3
First code unit = 'x'
Subject length lower bound = 3
JIT compilation was successful
@@ -511,7 +511,7 @@ Failed: error -46: JIT stack limit reached
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 1
JIT compilation was successful
abcd
@@ -543,11 +543,11 @@ Failed: error -47: match limit exceeded
# ----
/[aC]/mg,firstline,newline=lf
- match\nmatch
+match\nmatch
0: a (JIT)
/[aCz]/mg,firstline,newline=lf
- match\nmatch
+match\nmatch
0: a (JIT)
# End of testinput17
diff --git a/dist2/testdata/testoutput2 b/dist2/testdata/testoutput2
index 2f91c389..ecf0d800 100644
--- a/dist2/testdata/testoutput2
+++ b/dist2/testdata/testoutput2
@@ -47,12 +47,12 @@
0: DE
/(a)b|/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
/abc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
@@ -69,7 +69,7 @@ No match
No match
/^abc/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -85,30 +85,30 @@ No match
No match
/a+bc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
/a*bc/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Last code unit = 'c'
Subject length lower bound = 2
/a{3}bc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 5
/(abc|a+z)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 2
/^abc$/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -135,13 +135,13 @@ Failed: error 105 at offset 7: number too big in {} quantifier
Failed: error 106 at offset 5: missing terminating ] for character class
/[\B]/B
-Failed: error 107 at offset 2: escape sequence is invalid in character class
+Failed: error 107 at offset 2: invalid escape sequence in character class
/[\R]/B
-Failed: error 107 at offset 2: escape sequence is invalid in character class
+Failed: error 107 at offset 2: invalid escape sequence in character class
/[\X]/B
-Failed: error 107 at offset 2: escape sequence is invalid in character class
+Failed: error 107 at offset 2: invalid escape sequence in character class
/[z-a]/
Failed: error 108 at offset 3: range out of order in character class
@@ -159,19 +159,19 @@ Failed: error 118 at offset 7: missing ) after (?# comment
Failed: error 111 at offset 2: unrecognized character after (? or (?-
/.*b/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 1
/.*?b/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 1
/cat|dog|elephant/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: c d e
Subject length lower bound = 3
this sentence eventually mentions a cat
@@ -180,7 +180,7 @@ Subject length lower bound = 3
0: elephant
/cat|dog|elephant/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: c d e
Subject length lower bound = 3
this sentence eventually mentions a cat
@@ -189,7 +189,7 @@ Subject length lower bound = 3
0: elephant
/cat|dog|elephant/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
Starting code units: C D E c d e
Subject length lower bound = 3
@@ -199,12 +199,12 @@ Subject length lower bound = 3
0: elephant
/a|[bcd]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c d
Subject length lower bound = 1
/(a|[^\dZ])/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
@@ -222,7 +222,7 @@ Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
Subject length lower bound = 1
/(a|b)*[\s]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b
Subject length lower bound = 1
@@ -233,7 +233,7 @@ Failed: error 115 at offset 4: reference to non-existent subpattern
Failed: error 109 at offset 4: quantifier does not follow a repeatable item
/(a)(b)(c)\2/I
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 2
First code unit = 'a'
Last code unit = 'c'
@@ -267,7 +267,7 @@ Matched, but too many substrings
3: c
/(a)bc|(a)(b)\2/I
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 2
First code unit = 'a'
Subject length lower bound = 3
@@ -312,7 +312,7 @@ Matched, but too many substrings
3: b
/abc$/I,dollar_endonly
-Capture group count = 0
+Capturing subpattern count = 0
Options: dollar_endonly
First code unit = 'a'
Last code unit = 'c'
@@ -329,7 +329,7 @@ No match
Failed: error 115 at offset 16: reference to non-existent subpattern
/the quick brown fox/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 't'
Last code unit = 'x'
Subject length lower bound = 19
@@ -339,7 +339,7 @@ Subject length lower bound = 19
0: the quick brown fox
/the quick brown fox/I,anchored
-Capture group count = 0
+Capturing subpattern count = 0
Options: anchored
First code unit = 't'
Subject length lower bound = 19
@@ -353,7 +353,7 @@ No match
Failed: error 111 at offset 4: unrecognized character after (? or (?-
/^abc|def/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a d
Subject length lower bound = 3
abcdef
@@ -362,7 +362,7 @@ Subject length lower bound = 3
0: def
/.*((abc)$|(def))/I
-Capture group count = 3
+Capturing subpattern count = 3
First code unit at start or follows newline
Subject length lower bound = 3
defabc
@@ -382,7 +382,7 @@ Failed: error 122 at offset 0: unmatched closing parenthesis
Failed: error 106 at offset 4: missing terminating ] for character class
/[^aeiou ]{3,}/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6
@@ -402,7 +402,7 @@ Subject length lower bound = 3
0: -pr
/<.*>/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 2
@@ -410,7 +410,7 @@ Subject length lower bound = 2
0: <def>ghi<klm>
/<.*?>/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 2
@@ -418,7 +418,7 @@ Subject length lower bound = 2
0: <def>
/<.*>/I,ungreedy
-Capture group count = 0
+Capturing subpattern count = 0
Options: ungreedy
First code unit = '<'
Last code unit = '>'
@@ -427,7 +427,7 @@ Subject length lower bound = 2
0: <def>
/(?U)<.*>/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 2
@@ -435,7 +435,7 @@ Subject length lower bound = 2
0: <def>
/<.*?>/I,ungreedy
-Capture group count = 0
+Capturing subpattern count = 0
Options: ungreedy
First code unit = '<'
Last code unit = '>'
@@ -444,7 +444,7 @@ Subject length lower bound = 2
0: <def>ghi<klm>
/={3,}/I,ungreedy
-Capture group count = 0
+Capturing subpattern count = 0
Options: ungreedy
First code unit = '='
Last code unit = '='
@@ -453,7 +453,7 @@ Subject length lower bound = 3
0: ===
/(?U)={3,}?/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '='
Last code unit = '='
Subject length lower bound = 3
@@ -461,7 +461,7 @@ Subject length lower bound = 3
0: ========
/(?<!bar|cattle)foo/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 6
First code unit = 'f'
Last code unit = 'o'
@@ -485,69 +485,69 @@ Failed: error 125 at offset 5: lookbehind assertion is not fixed length
/(?<!(foo)a\1)bar/
/(?i)abc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
/(a|(?m)a)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/(?i)^1234/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = '1'
Subject length lower bound = 4
/(^b|(?i)^d)/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
Starting code units: D b d
Subject length lower bound = 1
/(?s).*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Compile options: <none>
Overall options: anchored
Subject length lower bound = 0
/[abcd]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c d
Subject length lower bound = 1
/(?i)[abcd]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B C D a b c d
Subject length lower bound = 1
/(?m)[xy]|(b|c)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: b c x y
Subject length lower bound = 1
/(^a|^b)/Im
-Capture group count = 1
+Capturing subpattern count = 1
Options: multiline
First code unit at start or follows newline
Subject length lower bound = 1
/(?i)(^a|^b)/Im
-Capture group count = 1
+Capturing subpattern count = 1
Options: multiline
First code unit at start or follows newline
Subject length lower bound = 1
/(a)(?(1)a|b|c)/
-Failed: error 127 at offset 3: conditional subpattern contains more than two branches
+Failed: error 127 at offset 3: conditional group contains more than two branches
/(?(?=a)a|b|c)/
-Failed: error 127 at offset 0: conditional subpattern contains more than two branches
+Failed: error 127 at offset 0: conditional group contains more than two branches
/(?(1a)/
Failed: error 124 at offset 4: missing closing parenthesis for condition
@@ -565,14 +565,14 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
/((?s)blah)\s+\1/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'b'
Last code unit = 'h'
Subject length lower bound = 9
/((?i)blah)\s+\1/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'b' (caseless)
Last code unit = 'h' (caseless)
@@ -587,17 +587,17 @@ Subject length lower bound = 9
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'b' (caseless)
Subject length lower bound = 1
/(a*b|(?i:c*(?-i)d))/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: C a b c d
Subject length lower bound = 1
/a$/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
a
@@ -611,7 +611,7 @@ No match
No match
/a$/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit = 'a'
Subject length lower bound = 1
@@ -626,7 +626,7 @@ Subject length lower bound = 1
No match
/\Aabc/Im
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
Compile options: multiline
Overall options: anchored multiline
@@ -634,14 +634,14 @@ First code unit = 'a'
Subject length lower bound = 3
/^abc/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit at start or follows newline
Last code unit = 'c'
Subject length lower bound = 3
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/I
-Capture group count = 5
+Capturing subpattern count = 5
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -655,25 +655,25 @@ Subject length lower bound = 3
5: def
/(?<=foo)[ab]/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 3
Starting code units: a b
Subject length lower bound = 1
/(?<!foo)(alpha|omega)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max lookbehind = 3
Starting code units: a o
Last code unit = 'a'
Subject length lower bound = 5
/(?!alphabet)[ab]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/(?<=foo\n)^bar/Im
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
@@ -690,7 +690,7 @@ No match
No match
/^(?<=foo\n)bar/Im
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
@@ -708,7 +708,7 @@ No match
No match
/(?>^abc)/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit at start or follows newline
Last code unit = 'c'
@@ -733,7 +733,7 @@ Failed: error 125 at offset 0: lookbehind assertion is not fixed length
/The next three are in testinput2 because they have variable length branches/
/(?<=bullock|donkey)-cart/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 7
First code unit = '-'
Last code unit = 't'
@@ -749,13 +749,13 @@ No match
No match
/(?<=ab(?i)x|y|z)/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 3
May match empty string
Subject length lower bound = 0
/(?>.*)(?<=(abcd)|(xyz))/I
-Capture group count = 2
+Capturing subpattern count = 2
Max lookbehind = 4
May match empty string
Subject length lower bound = 0
@@ -768,7 +768,7 @@ Subject length lower bound = 0
2: xyz
/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 4
First code unit = 'Z'
Last code unit = 'Z'
@@ -796,7 +796,7 @@ No match
No match
/(?<!(foo)a)bar/I
-Capture group count = 1
+Capturing subpattern count = 1
Max lookbehind = 4
First code unit = 'b'
Last code unit = 'r'
@@ -812,7 +812,7 @@ No match
# This one is here because Perl behaves differently; see also the following.
/^(a\1?){4}$/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Compile options: <none>
Overall options: anchored
@@ -949,7 +949,7 @@ Failed: error 101 at offset 4: \ at end of pattern
Failed: error 101 at offset 4: \ at end of pattern
/(a)bc(d)/I
-Capture group count = 2
+Capturing subpattern count = 2
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
@@ -969,7 +969,7 @@ Subject length lower bound = 4
Copy substring 5 failed (-49): unknown substring
/(.{20})/I
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 20
abcdefghijklmnopqrstuvwxyz
0: abcdefghijklmnopqrst
@@ -984,7 +984,7 @@ Subject length lower bound = 20
1G abcdefghijklmnopqrst (20)
/(.{15})/I
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 15
abcdefghijklmnopqrstuvwxyz
0: abcdefghijklmno
@@ -996,7 +996,7 @@ Subject length lower bound = 15
1G abcdefghijklmno (15)
/(.{16})/I
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 16
abcdefghijklmnopqrstuvwxyz
0: abcdefghijklmnop
@@ -1010,7 +1010,7 @@ Subject length lower bound = 16
1L abcdefghijklmnop
/^(a|(bc))de(f)/I
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: <none>
Overall options: anchored
Starting code units: a b
@@ -1049,7 +1049,7 @@ Get substring 4 failed (-49): unknown substring
0C adef (4)
/^abc\00def/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -1062,7 +1062,7 @@ Subject length lower bound = 7
/word ((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+
)((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+
)?)?)?)?)?)?)?)?)?otherword/I
-Capture group count = 8
+Capturing subpattern count = 8
Contains explicit CR or LF match
First code unit = 'w'
Last code unit = 'd'
@@ -1076,7 +1076,7 @@ Subject length lower bound = 14
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 'X'
Subject length lower bound = 1
@@ -1089,7 +1089,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: dotall
Overall options: anchored dotall
Last code unit = 'X'
@@ -1108,7 +1108,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit at start or follows newline
Subject length lower bound = 1
@@ -1125,7 +1125,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: dotall
Overall options: anchored dotall
Subject length lower bound = 1
@@ -1143,7 +1143,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
Subject length lower bound = 1
@@ -1161,13 +1161,13 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Subject length lower bound = 1
/\Biss\B/I,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'i'
Last code unit = 's'
@@ -1177,7 +1177,7 @@ Subject length lower bound = 3
0+ issippi
/iss/I,aftertext,altglobal
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'i'
Last code unit = 's'
Subject length lower bound = 3
@@ -1188,7 +1188,7 @@ Subject length lower bound = 3
0+ ippi
/\Biss\B/I,aftertext,altglobal
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'i'
Last code unit = 's'
@@ -1198,7 +1198,7 @@ Subject length lower bound = 3
0+ issippi
/\Biss\B/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'i'
Last code unit = 's'
@@ -1213,7 +1213,7 @@ Subject length lower bound = 3
No match
/(?<=[Ms])iss/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'i'
Last code unit = 's'
@@ -1225,7 +1225,7 @@ Subject length lower bound = 3
0+ ippi
/(?<=[Ms])iss/I,aftertext,altglobal
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'i'
Last code unit = 's'
@@ -1235,7 +1235,7 @@ Subject length lower bound = 3
0+ issippi
/^iss/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'i'
@@ -1245,7 +1245,7 @@ Subject length lower bound = 3
0+ issippi
/.*iss/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 's'
Subject length lower bound = 3
@@ -1256,7 +1256,7 @@ Subject length lower bound = 3
0+ pqr
/.i./Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'i'
Subject length lower bound = 3
Mississippi
@@ -1285,7 +1285,7 @@ Subject length lower bound = 3
0+ souri river
/^.is/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Subject length lower bound = 3
@@ -1294,7 +1294,7 @@ Subject length lower bound = 3
0+ sissippi
/^ab\n/Ig,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Compile options: <none>
Overall options: anchored
@@ -1305,7 +1305,7 @@ Subject length lower bound = 3
0+ ab\x0acd
/^ab\n/Igm,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: multiline
First code unit at start or follows newline
@@ -1329,238 +1329,238 @@ Subject length lower bound = 3
0+ xyz
/abc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
/abc|bac/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Last code unit = 'c'
Subject length lower bound = 3
/(abc|bac)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Last code unit = 'c'
Subject length lower bound = 3
/(abc|(c|dc))/I
-Capture group count = 2
+Capturing subpattern count = 2
Starting code units: a c d
Last code unit = 'c'
Subject length lower bound = 1
/(abc|(d|de)c)/I
-Capture group count = 2
+Capturing subpattern count = 2
Starting code units: a d
Last code unit = 'c'
Subject length lower bound = 2
/a*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
/a+/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/(baa|a+)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Last code unit = 'a'
Subject length lower bound = 1
/a{0,3}/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
/baa{3,}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'b'
Last code unit = 'a'
Subject length lower bound = 5
/"([^\\"]+|\\.)*"/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = '"'
Last code unit = '"'
Subject length lower bound = 2
/(abc|ab[cd])/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 3
/(a|.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Subject length lower bound = 1
/a|ba|\w/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
/abc(?=pqr)/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'r'
Subject length lower bound = 3
/...(?<=abc)/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 3
Subject length lower bound = 3
/abc(?!pqr)/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
/ab./I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 3
/ab[xyz]/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 3
/abc*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/ab.c*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 3
/a.c*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 2
/.c*/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/ac*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/(a.c*|b.c*)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 2
/a.c*|aba/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 2
/.+a/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'a'
Subject length lower bound = 2
/(?=abcda)a.*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'a'
Subject length lower bound = 1
/(?=a)a.*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/a(b)*/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/a\d*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/ab\d*/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/a(\d)*/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/abcde{0,0}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
/ab\d+/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 3
/a(?(1)b)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Subject length lower bound = 2
/a(?(1)bag|big)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Last code unit = 'g'
Subject length lower bound = 5
/a(?(1)bag|big)*(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Subject length lower bound = 2
/a(?(1)bag|big)+(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Last code unit = 'g'
Subject length lower bound = 5
/a(?(1)b..|b..)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 5
/ab\d{0}e/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'e'
Subject length lower bound = 3
/a?b?/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
a
@@ -1576,7 +1576,7 @@ Subject length lower bound = 0
No match
/|-/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
abcd
@@ -1590,7 +1590,7 @@ Subject length lower bound = 0
No match
/^.?abcd/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Last code unit = 'd'
@@ -1604,7 +1604,7 @@ Subject length lower bound = 4
)* # Zero or more contents
\) # Closing )
/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1632,7 +1632,7 @@ No match
No match
/\( ( (?>[^()]+) | (?R) )* \) /Igx
-Capture group count = 1
+Capturing subpattern count = 1
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1647,7 +1647,7 @@ Subject length lower bound = 2
1: z
/\( (?: (?>[^()]+) | (?R) ) \) /Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1665,7 +1665,7 @@ Subject length lower bound = 3
No match
/\( (?: (?>[^()]+) | (?R) )? \) /Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1676,7 +1676,7 @@ Subject length lower bound = 2
0: (fsh)
/\( ( (?>[^()]+) | (?R) )* \) /Ix
-Capture group count = 1
+Capturing subpattern count = 1
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1686,7 +1686,7 @@ Subject length lower bound = 2
1: cd
/\( ( ( (?>[^()]+) | (?R) )* ) \) /Ix
-Capture group count = 2
+Capturing subpattern count = 2
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1697,7 +1697,7 @@ Subject length lower bound = 2
2: cd
/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /Ix
-Capture group count = 3
+Capturing subpattern count = 3
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1714,7 +1714,7 @@ Subject length lower bound = 2
3: cd
/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /Ix
-Capture group count = 3
+Capturing subpattern count = 3
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1731,7 +1731,7 @@ Subject length lower bound = 2
3: cd
/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /Ix
-Capture group count = 11
+Capturing subpattern count = 11
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1751,7 +1751,7 @@ Subject length lower bound = 2
11: cd
/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /Ix
-Capture group count = 3
+Capturing subpattern count = 3
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1762,7 +1762,7 @@ Subject length lower bound = 2
2: 123
/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /Ix
-Capture group count = 3
+Capturing subpattern count = 3
Options: extended
First code unit = '('
Last code unit = ')'
@@ -1786,7 +1786,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
@@ -1801,7 +1801,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1826,7 +1826,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
@@ -1841,7 +1841,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1860,7 +1860,7 @@ Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
Subject length lower bound = 1
/[_[:alpha:]]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
_ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
@@ -1873,7 +1873,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1892,7 +1892,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a
@@ -1914,7 +1914,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x09 \x20
@@ -1928,7 +1928,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
@@ -1948,7 +1948,7 @@ Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
Subject length lower bound = 1
/[\n\x0b\x0c\x0d[:blank:]]/I
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
Subject length lower bound = 1
@@ -1961,7 +1961,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -1977,7 +1977,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
@@ -1991,7 +1991,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 :
@@ -2007,7 +2007,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -2021,7 +2021,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
@@ -2037,7 +2037,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^
@@ -2052,7 +2052,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
@@ -2066,7 +2066,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
@@ -2080,7 +2080,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f
@@ -2094,7 +2094,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
@@ -2109,7 +2109,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
@@ -2134,7 +2134,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
@@ -2161,7 +2161,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
@@ -2187,7 +2187,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W
X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
@@ -2202,7 +2202,7 @@ Failed: error 113 at offset 1: POSIX collating elements are not supported
Failed: error 130 at offset 3: unknown POSIX class name
/[[:upper:]]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -2213,7 +2213,7 @@ Subject length lower bound = 1
0: a
/[[:lower:]]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -2224,7 +2224,7 @@ Subject length lower bound = 1
0: a
/((?-i)[[:lower:]])[[:lower:]]/Ii
-Capture group count = 1
+Capturing subpattern count = 1
Options: caseless
Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 2
@@ -2249,7 +2249,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern
# This one's here because of the large output vector needed
/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I
-Capture group count = 271
+Capturing subpattern count = 271
Max back reference = 270
Starting code units: 0 1 2 3 4 5 6 7 8 9
Subject length lower bound = 0
@@ -2530,7 +2530,7 @@ Subject length lower bound = 0
# This one's here because Perl does this differently and PCRE2 can't at present
/(main(O)?)+/I
-Capture group count = 2
+Capturing subpattern count = 2
First code unit = 'm'
Last code unit = 'n'
Subject length lower bound = 4
@@ -2545,7 +2545,7 @@ Subject length lower bound = 4
# These are all cases where Perl does it differently (nested captures)
/^(a(b)?)+$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2556,7 +2556,7 @@ Subject length lower bound = 1
2: b
/^(aa(bb)?)+$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2567,7 +2567,7 @@ Subject length lower bound = 2
2: bb
/^(aa|aa(bb))+$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2578,7 +2578,7 @@ Subject length lower bound = 2
2: bb
/^(aa(bb)??)+$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2589,7 +2589,7 @@ Subject length lower bound = 2
2: bb
/^(?:aa(bb)?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2599,7 +2599,7 @@ Subject length lower bound = 2
1: bb
/^(aa(b(b))?)+$/I
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2611,7 +2611,7 @@ Subject length lower bound = 2
3: b
/^(?:aa(b(b))?)+$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2622,7 +2622,7 @@ Subject length lower bound = 2
2: b
/^(?:aa(b(?:b))?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2632,7 +2632,7 @@ Subject length lower bound = 2
1: bb
/^(?:aa(bb(?:b))?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2642,7 +2642,7 @@ Subject length lower bound = 2
1: bbb
/^(?:aa(b(?:bb))?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2652,7 +2652,7 @@ Subject length lower bound = 2
1: bbb
/^(?:aa(?:b(b))?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2662,7 +2662,7 @@ Subject length lower bound = 2
1: b
/^(?:aa(?:b(bb))?)+$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2672,7 +2672,7 @@ Subject length lower bound = 2
1: bb
/^(aa(b(bb))?)+$/I
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2684,7 +2684,7 @@ Subject length lower bound = 2
3: bb
/^(aa(bb(bb))?)+$/I
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -2703,7 +2703,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -2715,7 +2715,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -2727,7 +2727,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
Subject length lower bound = 1
@@ -2738,7 +2738,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
\x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
@@ -2763,7 +2763,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b' (caseless)
Subject length lower bound = 2
@@ -2785,7 +2785,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b' (caseless)
Subject length lower bound = 2
@@ -2806,7 +2806,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
@@ -2820,7 +2820,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
@@ -2833,7 +2833,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '1'
Last code unit = '0'
Subject length lower bound = 300
@@ -2845,7 +2845,7 @@ Subject length lower bound = 300
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = '1'
Last code unit = '0'
Subject length lower bound = 300
@@ -2856,7 +2856,7 @@ Subject length lower bound = 300
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
\
@@ -2869,7 +2869,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'x'
Subject length lower bound = 1
@@ -2880,7 +2880,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = ' '
Subject length lower bound = 1
@@ -2891,7 +2891,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
abc
@@ -2908,7 +2908,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -2922,7 +2922,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
@@ -2935,7 +2935,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
@@ -2950,7 +2950,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
@@ -2961,7 +2961,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'x'
Subject length lower bound = 1
@@ -3011,12 +3011,12 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
/^(\w++|\s++)*$/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Compile options: <none>
Overall options: anchored
@@ -3029,7 +3029,7 @@ Subject length lower bound = 0
No match
/(\d++)(\w)/I
-Capture group count = 2
+Capturing subpattern count = 2
Starting code units: 0 1 2 3 4 5 6 7 8 9
Subject length lower bound = 2
12345a
@@ -3041,7 +3041,7 @@ Subject length lower bound = 2
No match
/a++b/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3049,7 +3049,7 @@ Subject length lower bound = 2
0: aaab
/(a++b)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3058,7 +3058,7 @@ Subject length lower bound = 2
1: aaab
/(a++)b/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3067,7 +3067,7 @@ Subject length lower bound = 2
1: aaa
/([^()]++|\([^()]*\))+/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( * + , - . / 0 1 2 3 4 5
@@ -3088,7 +3088,7 @@ Subject length lower bound = 1
1: x
/\(([^()]++|\([^()]+\))+\)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = '('
Last code unit = ')'
Subject length lower bound = 3
@@ -3123,7 +3123,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
@@ -3149,7 +3149,7 @@ Failed: error 109 at offset 7: quantifier does not follow a repeatable item
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'x'
Last code unit = 'b'
Subject length lower bound = 3
@@ -3165,7 +3165,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'x'
Last code unit = 'b'
Subject length lower bound = 3
@@ -3193,7 +3193,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 5
+Capturing subpattern count = 5
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -3209,7 +3209,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'x'
@@ -3228,7 +3228,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'x'
@@ -3282,7 +3282,7 @@ Failed: error 106 at offset 10: missing terminating ] for character class
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
Subject length lower bound = 1
@@ -3293,7 +3293,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
Subject length lower bound = 1
@@ -3304,12 +3304,12 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b c d e
Subject length lower bound = 1
/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = '<'
Last code unit = '>'
@@ -3338,7 +3338,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = '8'
Last code unit = 'X'
@@ -3352,43 +3352,43 @@ Subject length lower bound = 409
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = '$'
Last code unit = 'X'
Subject length lower bound = 404
/(.*)\d+\1/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Subject length lower bound = 1
/(.*)\d+/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit at start or follows newline
Subject length lower bound = 1
/(.*)\d+\1/Is
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Options: dotall
Subject length lower bound = 1
/(.*)\d+/Is
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: dotall
Overall options: anchored dotall
Subject length lower bound = 1
/(.*(xyz))\d+\2/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
First code unit at start or follows newline
Last code unit = 'z'
Subject length lower bound = 7
/((.*))\d+\1/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 1
Subject length lower bound = 1
abc123bc
@@ -3397,143 +3397,143 @@ Subject length lower bound = 1
2: bc
/a[b]/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/(?=a).*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
First code unit = 'a'
Subject length lower bound = 0
/(?=abc).xyz/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = 'a' (caseless)
Last code unit = 'z' (caseless)
Subject length lower bound = 4
/(?=abc)(?i).xyz/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'z' (caseless)
Subject length lower bound = 4
/(?=a)(?=b)/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
First code unit = 'a'
Subject length lower bound = 0
/(?=.)a/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/((?=abcda)a)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'a'
Subject length lower bound = 1
/((?=abcda)ab)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/()a/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/(?:(?=.)|(?<!x))a/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 'a'
Subject length lower bound = 1
/(?(1)ab|ac)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Subject length lower bound = 3
/(?(1)abz|acz)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Last code unit = 'z'
Subject length lower bound = 4
/(?(1)abz)(.)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Subject length lower bound = 1
/(?(1)abz)(1)23/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Last code unit = '3'
Subject length lower bound = 3
/(a)+/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/(a){2,3}/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'a'
Subject length lower bound = 2
/(a)*/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
/[a]/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/[ab]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/[ab]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/[^a]/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/\d456/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = '6'
Subject length lower bound = 4
/\d456/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = '6'
Subject length lower bound = 4
/a^b/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/^a/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit at start or follows newline
Last code unit = 'a'
@@ -3547,24 +3547,24 @@ Subject length lower bound = 1
No match
/c|abc/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a c
Last code unit = 'c'
Subject length lower bound = 1
/(?i)[ab]/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B a b
Subject length lower bound = 1
/[ab](?i)cd/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Last code unit = 'd' (caseless)
Subject length lower bound = 3
/abc(?C)def/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -3585,7 +3585,7 @@ No match
No match
/abc(?C)de(?C1)f/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -3596,7 +3596,7 @@ Subject length lower bound = 6
0: abcdef
/(?C1)\dabc(?C2)def/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = 'f'
Subject length lower bound = 7
@@ -3613,7 +3613,7 @@ Subject length lower bound = 7
No match
/(?C1)\dabc(?C2)def/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = 'f'
Subject length lower bound = 7
@@ -3630,7 +3630,7 @@ Subject length lower bound = 7
No match
/(?C255)ab/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3645,7 +3645,7 @@ Failed: error 182 at offset 3: unrecognized string delimiter follows (?C
Failed: error 139 at offset 5: closing parenthesis for (?C expected
/abc(?C)def/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -3655,7 +3655,7 @@ Subject length lower bound = 6
0: abcdef
/(abc)(?C)de(?C1)f/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -3687,7 +3687,7 @@ Callout 1: last capture = 1
No match
/(?C0)(abc(?C1))*/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
abcabcabc
@@ -3715,7 +3715,7 @@ Subject length lower bound = 0
1: abc
/(\d{3}(?C))*/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
123\=callout_capture
@@ -3750,7 +3750,7 @@ Callout 0: last capture = 1
1: 789
/((xyz)(?C)p|(?C1)xyzabc)/I
-Capture group count = 2
+Capturing subpattern count = 2
First code unit = 'x'
Subject length lower bound = 4
xyzabc\=callout_capture
@@ -3766,7 +3766,7 @@ Callout 1: last capture = 0
1: xyzabc
/(X)((xyz)(?C)p|(?C1)xyzabc)/I
-Capture group count = 3
+Capturing subpattern count = 3
First code unit = 'X'
Last code unit = 'x'
Subject length lower bound = 5
@@ -3786,7 +3786,7 @@ Callout 1: last capture = 1
2: xyzabc
/(?=(abc))(?C)abcdef/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -3799,7 +3799,7 @@ Callout 0: last capture = 1
1: abc
/(?!(abc)(?C1)d)(?C2)abcxyz/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'z'
Subject length lower bound = 6
@@ -3814,7 +3814,7 @@ Callout 2: last capture = 0
0: abcxyz
/(?<=(abc)(?C))xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Max lookbehind = 3
First code unit = 'x'
Last code unit = 'z'
@@ -3828,7 +3828,7 @@ Callout 0: last capture = 1
1: abc
/a(b+)(c*)(?C1)/I
-Capture group count = 2
+Capturing subpattern count = 2
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3840,7 +3840,7 @@ Callout data = 1
No match
/a(b+?)(c*?)(?C1)/I
-Capture group count = 2
+Capturing subpattern count = 2
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3866,25 +3866,25 @@ Callout data = 1
No match
/(?C)abc/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 3
/(?C)^abc/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3
/(?C)a|b/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/a|(b)(?C)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 1
b
@@ -3894,7 +3894,7 @@ Subject length lower bound = 1
1: b
/x(ab|(bc|(de|(?R))))/I
-Capture group count = 3
+Capturing subpattern count = 3
First code unit = 'x'
Subject length lower bound = 3
xab
@@ -3924,7 +3924,7 @@ Subject length lower bound = 3
No match
/^([^()]|\((?1)*\))*$/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Compile options: <none>
Overall options: anchored
@@ -3943,7 +3943,7 @@ Subject length lower bound = 0
No match
/^>abc>([^()]|\((?1)*\))*<xyz<$/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = '>'
@@ -3970,7 +3970,7 @@ Subject length lower bound = 10
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -3988,13 +3988,13 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
/^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
Starting code units: ( - 0 1 2 3 4 5 6 7 8 9
@@ -4014,7 +4014,7 @@ Subject length lower bound = 1
No match
/^(x(y|(?1){2})z)/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
First code unit = 'x'
@@ -4034,7 +4034,7 @@ No match
No match
/((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/Ix
-Capture group count = 2
+Capturing subpattern count = 2
Options: extended
First code unit = '<'
Last code unit = '>'
@@ -4074,7 +4074,7 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
Failed: error 114 at offset 10: missing closing parenthesis
/^(abc)def(?1)/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -4084,7 +4084,7 @@ Subject length lower bound = 9
1: abc
/^(a|b|c)=(?1)+/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
Starting code units: a b c
@@ -4100,7 +4100,7 @@ Subject length lower bound = 2
1: a
/^(a|b|c)=((?1))+/I
-Capture group count = 2
+Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
Starting code units: a b c
@@ -4134,8 +4134,8 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
longername2 2
name1 1
First code unit = 'a'
@@ -4168,8 +4168,8 @@ Subject length lower bound = 4
Ket
End
------------------------------------------------------------------
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
a 3
c 1
d 2
@@ -4193,18 +4193,18 @@ Subject length lower bound = 4
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
a 1
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 10
/^\W*(?:(?P<one>(?P<two>.)\W*(?P>one)\W*(?P=two)|)|(?P<three>(?P<four>.)\W*(?P>three)\W*(?P=four)|\W*.\W*))\W*$/Ii
-Capture group count = 4
+Capturing subpattern count = 4
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
four 4
one 1
three 3
@@ -4240,7 +4240,7 @@ Subject length lower bound = 0
No match
/((?(R)a|b))\1(?1)?/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Subject length lower bound = 2
bb
@@ -4251,21 +4251,21 @@ Subject length lower bound = 2
1: b
/(.*)a/Is
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: dotall
Overall options: anchored dotall
Last code unit = 'a'
Subject length lower bound = 1
/(.*)a\1/Is
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Options: dotall
Last code unit = 'a'
Subject length lower bound = 1
/(.*)a(b)\2/Is
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
Compile options: dotall
Overall options: anchored dotall
@@ -4273,35 +4273,35 @@ Last code unit = 'b'
Subject length lower bound = 3
/((.*)a|(.*)b)z/Is
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: dotall
Overall options: anchored dotall
Last code unit = 'z'
Subject length lower bound = 2
/((.*)a|(.*)b)z\1/Is
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 1
Options: dotall
Last code unit = 'z'
Subject length lower bound = 3
/((.*)a|(.*)b)z\2/Is
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 2
Options: dotall
Last code unit = 'z'
Subject length lower bound = 2
/((.*)a|(.*)b)z\3/Is
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 3
Options: dotall
Last code unit = 'z'
Subject length lower bound = 2
/((.*)a|^(.*)b)z\3/Is
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 3
Compile options: dotall
Overall options: anchored dotall
@@ -4309,21 +4309,21 @@ Last code unit = 'z'
Subject length lower bound = 2
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is
-Capture group count = 31
+Capturing subpattern count = 31
May match empty string
Compile options: dotall
Overall options: anchored dotall
Subject length lower bound = 0
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is
-Capture group count = 31
+Capturing subpattern count = 31
Max back reference = 31
May match empty string
Options: dotall
Subject length lower bound = 0
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is
-Capture group count = 32
+Capturing subpattern count = 32
Max back reference = 32
May match empty string
Options: dotall
@@ -4341,7 +4341,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: no_auto_capture
First code unit = 'a'
Last code unit = 'c'
@@ -4361,8 +4361,8 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
one 1
Options: no_auto_capture
First code unit = 'a'
@@ -4384,8 +4384,8 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
named 1
Options: no_auto_capture
First code unit = 'a'
@@ -4393,7 +4393,7 @@ Last code unit = 'c'
Subject length lower bound = 3
/(aaa(?C1)bbb|ab)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 2
@@ -4421,8 +4421,8 @@ Callout data = -1
No match
/ab(?P<one>cd)ef(?P<two>gh)/I
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
one 1
two 2
First code unit = 'a'
@@ -4461,8 +4461,8 @@ Copy substring 'three' failed (-49): unknown substring
Ket
End
------------------------------------------------------------------
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
Tes 1
Test 2
May match empty string
@@ -4478,16 +4478,16 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
Tes 2
Test 1
May match empty string
Subject length lower bound = 0
/(?P<Z>zz)(?P<A>aa)/I
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
A 2
Z 1
First code unit = 'z'
@@ -4511,8 +4511,8 @@ Failed: error 143 at offset 16: two named subpatterns have the same name (PCRE2_
Failed: error 143 at offset 31: two named subpatterns have the same name (PCRE2_DUPNAMES not set)
"\[((?P<elem>\d+)(,(?P>elem))*)\]"I
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
elem 2
First code unit = '['
Last code unit = ']'
@@ -4527,8 +4527,8 @@ Subject length lower bound = 3
No match
"\[((?P<elem>\d+)(,(?P>elem))*)?\]"I
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
elem 2
First code unit = '['
Last code unit = ']'
@@ -4556,7 +4556,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
@@ -4575,7 +4575,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
@@ -4605,7 +4605,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
@@ -4638,7 +4638,7 @@ Subject length lower bound = 0
------------------------------------------------------------------
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
-Capture group count = 3
+Capturing subpattern count = 3
Options: caseless
Last code unit = 'g' (caseless)
Subject length lower bound = 8
@@ -4647,7 +4647,7 @@ Subject length lower bound = 8
1: Baby Bjorn Active Carrier - With free SHIPPING!!
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
-Capture group count = 3
+Capturing subpattern count = 3
Options: caseless
Last code unit = 'g' (caseless)
Subject length lower bound = 8
@@ -4664,7 +4664,7 @@ Subject length lower bound = 8
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'b'
Subject length lower bound = 1
@@ -4682,7 +4682,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Last code unit = 'c'
Subject length lower bound = 1
@@ -4697,7 +4697,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 6
@@ -4719,7 +4719,7 @@ Subject length lower bound = 6
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout
First code unit = 'a'
Last code unit = 'e'
@@ -4754,7 +4754,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout
Starting code units: a b
Last code unit = 'b'
@@ -4797,7 +4797,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout
Starting code units: a b
Last code unit = 'b'
@@ -4840,7 +4840,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout
First code unit = 'a'
Last code unit = 'b'
@@ -4897,7 +4897,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a d
Last code unit = 'x'
@@ -4971,7 +4971,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a d
Last code unit = 'x'
@@ -5019,7 +5019,7 @@ Subject length lower bound = 4
No match
/(ab|cd){3,4}/I,auto_callout
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a c
Subject length lower bound = 6
@@ -5111,7 +5111,7 @@ Subject length lower bound = 6
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a b x
Subject length lower bound = 2
@@ -5161,7 +5161,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a b x
Subject length lower bound = 2
@@ -5262,7 +5262,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: auto_callout
Starting code units: a b x
Last code unit = '3'
@@ -5294,7 +5294,7 @@ Subject length lower bound = 11
1: aac
/\b.*/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
May match empty string
Subject length lower bound = 0
@@ -5302,7 +5302,7 @@ Subject length lower bound = 0
0: cd
/\b.*/Is
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
May match empty string
Options: dotall
@@ -5311,14 +5311,14 @@ Subject length lower bound = 0
0: cd
/(?!.bcd).*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
Xbcd12345
0: bcd12345
/abcde/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'e'
Subject length lower bound = 5
@@ -5337,7 +5337,7 @@ Partial match: abc
No match
"^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I
-Capture group count = 3
+Capturing subpattern count = 3
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
@@ -5393,31 +5393,31 @@ No match
No match
/0{0,2}ABC/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 A
Last code unit = 'C'
Subject length lower bound = 3
/\d{3,}ABC/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = 'C'
Subject length lower bound = 6
/\d*ABC/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A
Last code unit = 'C'
Subject length lower bound = 3
/[abc]+DE/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c
Last code unit = 'E'
Subject length lower bound = 3
/[abc]?123/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 1 a b c
Last code unit = '3'
Subject length lower bound = 3
@@ -5435,7 +5435,7 @@ Partial match: c12
0: c123
/^(?:\d){3,5}X/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
@@ -5462,7 +5462,7 @@ No match
No match
"<(\w+)/?>(.)*</(\1)>"Igms
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 1
Options: dotall multiline
First code unit = '<'
@@ -5475,7 +5475,7 @@ Subject length lower bound = 7
3: seite
/line\nbreak/I
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
First code unit = 'l'
Last code unit = 'k'
@@ -5486,7 +5486,7 @@ Subject length lower bound = 10
0: line\x0abreak
/line\nbreak/I,firstline
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: firstline
First code unit = 'l'
@@ -5499,7 +5499,7 @@ Subject length lower bound = 10
No match
/line\nbreak/Im,firstline
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: firstline multiline
First code unit = 'l'
@@ -5512,7 +5512,7 @@ Subject length lower bound = 10
No match
/(?i)(?-i)AbCd/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'A'
Last code unit = 'd'
Subject length lower bound = 4
@@ -5532,7 +5532,7 @@ Failed: error 105 at offset 9: number too big in {} quantifier
Failed: error 105 at offset 11: number too big in {} quantifier
"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a' (caseless)
Last code unit = 'B'
@@ -5542,9 +5542,9 @@ Subject length lower bound = 15
1: k
"(?P<n0>a)(?P<n1>b)(?P<n2>c)(?P<n3>d)(?P<n4>e)(?P<n5>f)(?P<n6>g)(?P<n7>h)(?P<n8>i)(?P<n9>j)(?P<n10>k)(?P<n11>l)A\11B"I
-Capture group count = 12
+Capturing subpattern count = 12
Max back reference = 11
-Named capture groups:
+Named capturing subpatterns:
n0 1
n1 2
n10 11
@@ -5576,7 +5576,7 @@ Subject length lower bound = 15
12: l
"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"I
-Capture group count = 12
+Capturing subpattern count = 12
Max back reference = 11
First code unit = 'a'
Last code unit = 'B'
@@ -5597,8 +5597,8 @@ Subject length lower bound = 15
12: l
"(?P<name0>a)(?P<name1>a)(?P<name2>a)(?P<name3>a)(?P<name4>a)(?P<name5>a)(?P<name6>a)(?P<name7>a)(?P<name8>a)(?P<name9>a)(?P<name10>a)(?P<name11>a)(?P<name12>a)(?P<name13>a)(?P<name14>a)(?P<name15>a)(?P<name16>a)(?P<name17>a)(?P<name18>a)(?P<name19>a)(?P<name20>a)(?P<name21>a)(?P<name22>a)(?P<name23>a)(?P<name24>a)(?P<name25>a)(?P<name26>a)(?P<name27>a)(?P<name28>a)(?P<name29>a)(?P<name30>a)(?P<name31>a)(?P<name32>a)(?P<name33>a)(?P<name34>a)(?P<name35>a)(?P<name36>a)(?P<name37>a)(?P<name38>a)(?P<name39>a)(?P<name40>a)(?P<name41>a)(?P<name42>a)(?P<name43>a)(?P<name44>a)(?P<name45>a)(?P<name46>a)(?P<name47>a)(?P<name48>a)(?P<name49>a)(?P<name50>a)(?P<name51>a)(?P<name52>a)(?P<name53>a)(?P<name54>a)(?P<name55>a)(?P<name56>a)(?P<name57>a)(?P<name58>a)(?P<name59>a)(?P<name60>a)(?P<name61>a)(?P<name62>a)(?P<name63>a)(?P<name64>a)(?P<name65>a)(?P<name66>a)(?P<name67>a)(?P<name68>a)(?P<name69>a)(?P<name70>a)(?P<name71>a)(?P<name72>a)(?P<name73>a)(?P<name74>a)(?P<name75>a)(?P<name76>a)(?P<name77>a)(?P<name78>a)(?P<name79>a)(?P<name80>a)(?P<name81>a)(?P<name82>a)(?P<name83>a)(?P<name84>a)(?P<name85>a)(?P<name86>a)(?P<name87>a)(?P<name88>a)(?P<name89>a)(?P<name90>a)(?P<name91>a)(?P<name92>a)(?P<name93>a)(?P<name94>a)(?P<name95>a)(?P<name96>a)(?P<name97>a)(?P<name98>a)(?P<name99>a)(?P<name100>a)"I
-Capture group count = 101
-Named capture groups:
+Capturing subpattern count = 101
+Named capturing subpatterns:
name0 1
name1 2
name10 11
@@ -5722,7 +5722,7 @@ Matched, but too many substrings
14: a
"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"I
-Capture group count = 101
+Capturing subpattern count = 101
First code unit = 'a'
Last code unit = 'a'
Subject length lower bound = 101
@@ -5745,7 +5745,7 @@ Matched, but too many substrings
14: a
/[^()]*(?:\((?R)\)[^()]*)*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
(this(and)that
@@ -5756,7 +5756,7 @@ Subject length lower bound = 0
0: (this(and)that)stuff
/[^()]*(?:\((?>(?R))\)[^()]*)*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
(this(and)that
@@ -5765,7 +5765,7 @@ Subject length lower bound = 0
0: (this(and)that)
/[^()]*(?:\((?R)\))*[^()]*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
(this(and)that
@@ -5774,7 +5774,7 @@ Subject length lower bound = 0
0: (this(and)that)
/(?:\((?R)\))*[^()]*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
(this(and)that
@@ -5785,7 +5785,7 @@ Subject length lower bound = 0
0: ((this))
/(?:\((?R)\))|[^()]*/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
(this(and)that
@@ -5798,7 +5798,7 @@ Subject length lower bound = 0
0: ((this))
/\x{0000ff}/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \xff
Subject length lower bound = 1
@@ -5806,8 +5806,8 @@ Subject length lower bound = 1
Failed: error 143 at offset 18: two named subpatterns have the same name (PCRE2_DUPNAMES not set)
/^((?P<A>a1)|(?P<A>a2)b)/I,dupnames
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
A 2
A 3
Compile options: dupnames
@@ -5834,8 +5834,8 @@ Copy substring 'Z' failed (-49): unknown substring
C a1 (2) A (non-unique)
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
a 1
a 3
b 2
@@ -5844,8 +5844,8 @@ Options: dupnames
Subject length lower bound = 0
/^(?P<A>a)(?P<A>b)/I,dupnames
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
A 1
A 2
Compile options: dupnames
@@ -5859,8 +5859,8 @@ Subject length lower bound = 2
C a (1) A (non-unique)
/^(?P<A>a)(?P<A>b)|cd/I,dupnames
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
A 1
A 2
Options: dupnames
@@ -5876,8 +5876,8 @@ Subject length lower bound = 2
Copy substring 'A' failed (-55): requested value is not set
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
-Capture group count = 4
-Named capture groups:
+Capturing subpattern count = 4
+Named capturing subpatterns:
A 1
A 2
A 3
@@ -5894,8 +5894,8 @@ Subject length lower bound = 2
C ef (2) A (non-unique)
/^((?P<A>a1)|(?P<A>a2)b)/I,dupnames
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
A 2
A 3
Compile options: dupnames
@@ -5922,8 +5922,8 @@ Get substring 'Z' failed (-49): unknown substring
G a1 (2) A (non-unique)
/^(?P<A>a)(?P<A>b)/I,dupnames
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
A 1
A 2
Compile options: dupnames
@@ -5937,8 +5937,8 @@ Subject length lower bound = 2
G a (1) A (non-unique)
/^(?P<A>a)(?P<A>b)|cd/I,dupnames
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
A 1
A 2
Options: dupnames
@@ -5954,8 +5954,8 @@ Subject length lower bound = 2
Get substring 'A' failed (-55): requested value is not set
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
-Capture group count = 4
-Named capture groups:
+Capturing subpattern count = 4
+Named capturing subpatterns:
A 1
A 2
A 3
@@ -5972,8 +5972,8 @@ Subject length lower bound = 2
G ef (2) A (non-unique)
/(?J)^((?P<A>a1)|(?P<A>a2)b)/I
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
A 2
A 3
Compile options: <none>
@@ -6001,8 +6001,8 @@ Failed: error 143 at offset 38: two named subpatterns have the same name (PCRE2_
# a random value.
/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<C>d)/I
-Capture group count = 4
-Named capture groups:
+Capturing subpattern count = 4
+Named capturing subpatterns:
A 1
B 2
B 3
@@ -6023,9 +6023,9 @@ Subject length lower bound = 6
C d (1) C (group 4)
/^(?P<A>a)?(?(A)a|b)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
A 1
Compile options: <none>
Overall options: anchored
@@ -6040,9 +6040,9 @@ Subject length lower bound = 1
No match
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
Subject length lower bound = 2
@@ -6057,9 +6057,9 @@ Failed: error 124 at offset 7: missing closing parenthesis for condition
Failed: error 115 at offset 6: reference to non-existent subpattern
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
Subject length lower bound = 3
@@ -6068,9 +6068,9 @@ Subject length lower bound = 3
1: X
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
Subject length lower bound = 3
@@ -6079,12 +6079,12 @@ Subject length lower bound = 3
1: X
/(?P<ABC/I
-Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
/(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
A 1
Subject length lower bound = 2
bXXaYYaY
@@ -6095,9 +6095,9 @@ Subject length lower bound = 2
1: X
/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/I
-Capture group count = 10
+Capturing subpattern count = 10
Max back reference = 10
-Named capture groups:
+Named capturing subpatterns:
A 10
Subject length lower bound = 2
bXXaYYaY
@@ -6114,7 +6114,7 @@ Subject length lower bound = 2
10: Y
/\s*,\s*/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 ,
Last code unit = ','
Subject length lower bound = 1
@@ -6124,7 +6124,7 @@ Subject length lower bound = 1
0: \x0c,\x0d
/^abc/Im,newline=lf
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is LF
First code unit at start or follows newline
@@ -6147,7 +6147,7 @@ No match
No match
/^abc/Im,newline=crlf
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is CRLF
First code unit at start or follows newline
@@ -6162,7 +6162,7 @@ No match
No match
/^abc/Im,newline=cr
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is CR
First code unit at start or follows newline
@@ -6180,7 +6180,7 @@ No match
** Invalid value in 'newline=bad'
/.*/I,newline=lf
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is LF
First code unit at start or follows newline
@@ -6193,7 +6193,7 @@ Subject length lower bound = 0
0: abc\x0d
/.*/I,newline=cr
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is CR
First code unit at start or follows newline
@@ -6206,7 +6206,7 @@ Subject length lower bound = 0
0: abc
/.*/I,newline=crlf
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is CRLF
First code unit at start or follows newline
@@ -6219,7 +6219,7 @@ Subject length lower bound = 0
0: abc
/\w+(.)(.)?def/Is
-Capture group count = 2
+Capturing subpattern count = 2
Options: dotall
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -6237,8 +6237,8 @@ Subject length lower bound = 5
2: \x0a
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
B 1
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = '.'
@@ -6250,7 +6250,7 @@ Subject length lower bound = 7
()()()()()()()()()()()()()()()()()()()()
()()()()()()()()()()()()()()()()()()()()
(.(.))/Ix
-Capture group count = 102
+Capturing subpattern count = 102
Options: extended
Subject length lower bound = 2
XY\=ovector=133
@@ -6359,42 +6359,42 @@ Subject length lower bound = 2
102: Y
/(a*b|(?i:c*(?-i)d))/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: C a b c d
Subject length lower bound = 1
/()[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Last code unit = 'z'
Subject length lower bound = 4
/(|)[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Last code unit = 'z'
Subject length lower bound = 4
/(|c)[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c
Last code unit = 'z'
Subject length lower bound = 4
/(|c?)[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c
Last code unit = 'z'
Subject length lower bound = 4
/(d?|c?)[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c d
Last code unit = 'z'
Subject length lower bound = 4
/(d?|c)[ab]xyz/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c d
Last code unit = 'z'
Subject length lower bound = 4
@@ -6409,7 +6409,7 @@ Subject length lower bound = 4
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: a b
@@ -6426,7 +6426,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: a b
@@ -6443,7 +6443,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Starting code units: a b
@@ -6460,7 +6460,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
@@ -6482,7 +6482,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: caseless
Overall options: anchored caseless
Starting code units: A a
@@ -6494,22 +6494,22 @@ Subject length lower bound = 2
0: aaaa5
/(a*|b*)[cd]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c d
Subject length lower bound = 1
/(a+|b*)[cd]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c d
Subject length lower bound = 1
/(a*|b+)[cd]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c d
Subject length lower bound = 1
/(a+|b+)[cd]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 2
@@ -6521,7 +6521,7 @@ Subject length lower bound = 2
))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
)))
/Ix
-Capture group count = 203
+Capturing subpattern count = 203
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -7138,8 +7138,8 @@ Matched, but too many substrings
------------------------------------------------------------------
/(?P<abc>x)(?P<xyz>y)/I
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
abc 1
xyz 2
First code unit = 'x'
@@ -7153,8 +7153,8 @@ Subject length lower bound = 2
C y (1) xyz (group 2)
/(?<abc>x)(?'xyz'y)/I
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
abc 1
xyz 2
First code unit = 'x'
@@ -7168,10 +7168,10 @@ Subject length lower bound = 2
C y (1) xyz (group 2)
/(?<abc'x)(?'xyz'y)/I
-Failed: error 142 at offset 6: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 6: syntax error in subpattern name (missing terminator)
/(?<abc>x)(?'xyz>y)/I
-Failed: error 142 at offset 15: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 15: syntax error in subpattern name (missing terminator)
/(?P'abc'x)(?P<xyz>y)/I
Failed: error 141 at offset 3: unrecognized character after (?P
@@ -7364,9 +7364,9 @@ No match
------------------------------------------------------------------
/^\W*(?:(?<one>(?<two>.)\W*(?&one)\W*\k<two>|)|(?<three>(?<four>.)\W*(?&three)\W*\k'four'|\W*.\W*))\W*$/Ii
-Capture group count = 4
+Capturing subpattern count = 4
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
four 4
one 1
three 3
@@ -7402,7 +7402,7 @@ Subject length lower bound = 0
No match
/(?=(\w+))\1:/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -7413,9 +7413,9 @@ Subject length lower bound = 2
1: abcd
/(?=(?'abc'\w+))\k<abc>:/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
abc 1
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -7450,7 +7450,7 @@ No match
No match
/(?(<bc))/
-Failed: error 142 at offset 6: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 6: syntax error in subpattern name (missing terminator)
/(?(''))/
Failed: error 162 at offset 4: subpattern name expected
@@ -7474,10 +7474,10 @@ Failed: error 115 at offset 4: reference to non-existent subpattern
Failed: error 115 at offset 27: reference to non-existent subpattern
/^(?(DEFINE) abc | xyz ) /x
-Failed: error 154 at offset 4: DEFINE subpattern contains more than one branch
+Failed: error 154 at offset 4: DEFINE group contains more than one branch
/(?(DEFINE) abc) xyz/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'x'
Last code unit = 'z'
@@ -7621,8 +7621,8 @@ No match
No match
/(?&abc)X(?<abc>P)/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
abc 1
Last code unit = 'P'
Subject length lower bound = 3
@@ -7631,8 +7631,8 @@ Subject length lower bound = 3
1: P
/(?1)X(?<abc>P)/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
abc 1
Last code unit = 'P'
Subject length lower bound = 3
@@ -7933,7 +7933,7 @@ Failed: error 115 at offset 6: reference to non-existent subpattern
No match
/\g{A/
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator)
/(?|(abc)|(xyz))/B
------------------------------------------------------------------
@@ -8390,10 +8390,10 @@ No match
Failed: error 166 at offset 7: (*MARK) must have an argument
/(?i:A{1,}\6666666666)/
-Failed: error 161 at offset 19: subpattern number is too big
+Failed: error 161 at offset 19: group number is too big
/\g6666666666/
-Failed: error 161 at offset 7: subpattern number is too big
+Failed: error 161 at offset 7: group number is too big
/[\g6666666666]/B
------------------------------------------------------------------
@@ -8524,7 +8524,7 @@ No match
No match
/a\Rb/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -8542,7 +8542,7 @@ No match
No match
/a\Rb/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -8559,7 +8559,7 @@ Subject length lower bound = 3
0: a\x0bb
/a\R?b/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -8577,7 +8577,7 @@ No match
No match
/a\R?b/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -8594,7 +8594,7 @@ Subject length lower bound = 2
0: a\x0bb
/a\R{2,4}b/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -8612,7 +8612,7 @@ No match
No match
/a\R{2,4}b/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -8632,7 +8632,7 @@ Subject length lower bound = 4
No match
/(*BSR_ANYCRLF)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -8643,7 +8643,7 @@ Subject length lower bound = 3
0: a\x0db
/(*BSR_UNICODE)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -8652,7 +8652,7 @@ Subject length lower bound = 3
0: a\x85b
/(*BSR_ANYCRLF)(*CRLF)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
Forced newline is CRLF
First code unit = 'a'
@@ -8664,7 +8664,7 @@ Subject length lower bound = 3
0: a\x0db
/(*CRLF)(*BSR_UNICODE)a\Rb/I
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
Forced newline is CRLF
First code unit = 'a'
@@ -8674,7 +8674,7 @@ Subject length lower bound = 3
0: a\x85b
/(*CRLF)(*BSR_ANYCRLF)(*CR)ab/I
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
Forced newline is CR
First code unit = 'a'
@@ -8774,7 +8774,7 @@ No match
cat
No match
-/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames
+/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
cat
0: a
1:
@@ -8785,7 +8785,7 @@ No match
The ACTA] comes
0: TA]
-/TA]/allow_empty_class,match_unset_backref,dupnames
+/TA]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
The ACTA] comes
0: TA]
@@ -8833,22 +8833,22 @@ Failed: error 106 at offset 4: missing terminating ] for character class
/a[^]b/
Failed: error 106 at offset 5: missing terminating ] for character class
-/a[]b/allow_empty_class,match_unset_backref,dupnames
+/a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
No match
-/a[]+b/allow_empty_class,match_unset_backref,dupnames
+/a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
No match
-/a[]*+b/allow_empty_class,match_unset_backref,dupnames
+/a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
\= Expect no match
ab
No match
-/a[^]b/allow_empty_class,match_unset_backref,dupnames
+/a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aXb
0: aXb
a\nb
@@ -8857,7 +8857,7 @@ No match
ab
No match
-/a[^]+b/allow_empty_class,match_unset_backref,dupnames
+/a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aXb
0: aXb
a\nX\nXb
@@ -8893,12 +8893,12 @@ No match
Failed: error 109 at offset 8: quantifier does not follow a repeatable item
/(abc|pqr|123){0}[xyz]/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: x y z
Subject length lower bound = 1
/(?(?=.*b)b|^)/I,auto_callout
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: auto_callout
Subject length lower bound = 0
@@ -8938,11 +8938,11 @@ Subject length lower bound = 0
0: b
/(?(?=b).*b|^d)/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/(?(?=.*b).*b|^d)/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/xyz/auto_callout
@@ -9680,48 +9680,48 @@ Failed: error 115 at offset 8: reference to non-existent subpattern
Failed: error 125 at offset 0: lookbehind assertion is not fixed length
/(a|bc)\1/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: a b
Subject length lower bound = 2
/(a|bc)\1{2,3}/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: a b
Subject length lower bound = 3
/(a|bc)(?1)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 2
/(a|b\1)(a|b\1)/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 1
Starting code units: a b
Subject length lower bound = 2
/(a|b\1){2}/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: a b
Subject length lower bound = 2
/(a|bbbb\1)(a|bbbb\1)/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 1
Starting code units: a b
Subject length lower bound = 2
/(a|bbbb\1){2}/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: a b
Subject length lower bound = 2
/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'F'
@@ -9729,31 +9729,31 @@ Last code unit = ':'
Subject length lower bound = 22
/<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/Iis
-Capture group count = 11
+Capturing subpattern count = 11
Options: caseless dotall
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 47
"(?>.*/)foo"I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'o'
Subject length lower bound = 4
/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /Ix
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
Last code unit = '-'
Subject length lower bound = 8
/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii
-Capture group count = 1
+Capturing subpattern count = 1
Options: caseless
Starting code units: A B C a b c
Subject length lower bound = 1
/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: c d
Last code unit = 'b'
Subject length lower bound = 41
@@ -9763,7 +9763,7 @@ Subject length lower bound = 41
(?(1) (.*?)\1 | ([^\s]+)) # if quote found, match up to next matching
# quote, otherwise match up to next space
/Iisx
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 1
Options: caseless dotall extended
First code unit = '<'
@@ -9779,7 +9779,7 @@ Subject length lower bound = 9
[0-9a-f]{1,4} $ # final hex number at end of string
(?(1)|.) # check that there was an empty component
/Iix
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Compile options: caseless extended
Overall options: anchored caseless extended
@@ -9787,8 +9787,8 @@ Last code unit = ':'
Subject length lower bound = 2
/(?|(?<a>A)|(?<a>B))/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
a 1
Starting code units: A B
Subject length lower bound = 1
@@ -9807,9 +9807,9 @@ Failed: error 165 at offset 16: different names for subpatterns of the same numb
/(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
(?('quote')[a-z]+|[0-9]+)/Ix,dupnames
-Capture group count = 6
+Capturing subpattern count = 6
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
apostrophe 2
apostrophe 5
quote 1
@@ -9869,9 +9869,9 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 4
+Capturing subpattern count = 4
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
D 4
D 1
Compile options: dupnames extended
@@ -9917,9 +9917,9 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 4
+Capturing subpattern count = 4
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
A 1
A 4
Options: dupnames extended
@@ -9971,17 +9971,17 @@ Partial match: abca
xxxxabcde\=ph
Partial match: abcde
-/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames
+/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
cat
0: a
1:
2:
3: a
-/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames
-Capture group count = 3
+/(\3)(\1)(a)/I,alt_bsux,allow_empty_class,match_unset_backref,dupnames
+Capturing subpattern count = 3
Max back reference = 3
-Options: allow_empty_class dupnames match_unset_backref
+Options: alt_bsux allow_empty_class dupnames match_unset_backref
Last code unit = 'a'
Subject length lower bound = 1
cat
@@ -9991,7 +9991,7 @@ Subject length lower bound = 1
3: a
/(\3)(\1)(a)/I
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 3
Last code unit = 'a'
Subject length lower bound = 3
@@ -10000,8 +10000,8 @@ Subject length lower bound = 3
No match
/i(?(DEFINE)(?<s>a))/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
s 1
First code unit = 'i'
Subject length lower bound = 1
@@ -10009,7 +10009,7 @@ Subject length lower bound = 1
0: i
/()i(?(1)a)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'i'
Subject length lower bound = 1
@@ -10507,7 +10507,7 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte
------------------------------------------------------------------
/(?P<abn>(?P=abn)(?<badstufxxx)/B
-Failed: error 142 at offset 29: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 29: syntax error in subpattern name (missing terminator)
/(?P<abn>(?P=axn)xxx)/B
Failed: error 115 at offset 12: reference to non-existent subpattern
@@ -10619,13 +10619,13 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = '4'
Subject length lower bound = 5
/([abc])++1234/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b c
Last code unit = '4'
Subject length lower bound = 5
@@ -10634,149 +10634,149 @@ Subject length lower bound = 5
Failed: error 125 at offset 0: lookbehind assertion is not fixed length
/(^ab)/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2
/(^ab)++/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2
/(^ab|^)+/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Compile options: <none>
Overall options: anchored
Subject length lower bound = 0
/(^ab|^)++/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Compile options: <none>
Overall options: anchored
Subject length lower bound = 0
/(?:^ab)/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2
/(?:^ab)++/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2
/(?:^ab|^)+/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Compile options: <none>
Overall options: anchored
Subject length lower bound = 0
/(?:^ab|^)++/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Compile options: <none>
Overall options: anchored
Subject length lower bound = 0
/(.*ab)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 2
/(.*ab)++/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 2
/(.*ab|.*)+/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
First code unit at start or follows newline
Subject length lower bound = 0
/(.*ab|.*)++/I
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
First code unit at start or follows newline
Subject length lower bound = 0
/(?:.*ab)/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 2
/(?:.*ab)++/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit at start or follows newline
Last code unit = 'b'
Subject length lower bound = 2
/(?:.*ab|.*)+/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
First code unit at start or follows newline
Subject length lower bound = 0
/(?:.*ab|.*)++/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
First code unit at start or follows newline
Subject length lower bound = 0
/(?=a)[bcd]/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Subject length lower bound = 1
/((?=a))[bcd]/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/((?=a))+[bcd]/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/((?=a))++[bcd]/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Subject length lower bound = 1
/(?=a+)[bcd]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = 'a' (caseless)
Subject length lower bound = 1
/(?=a+?)[bcd]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = 'a' (caseless)
Subject length lower bound = 1
/(?=a++)[bcd]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = 'a' (caseless)
Subject length lower bound = 1
/(?=a{3})[bcd]/Ii
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless
First code unit = 'a' (caseless)
Last code unit = 'a' (caseless)
@@ -10922,22 +10922,22 @@ Matched, but too many substrings
3: b
/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I
-Capture group count = 2
+Capturing subpattern count = 2
Starting code units: a b
Subject length lower bound = 3
/(a(?2)|b)(b(?1)|a)(?1)(?2)/I
-Capture group count = 2
+Capturing subpattern count = 2
Starting code units: a b
Subject length lower bound = 4
/(abc)(?1)/I
-Capture group count = 1
+Capturing subpattern count = 1
First code unit = 'a'
Last code unit = 'c'
Subject length lower bound = 6
@@ -11206,20 +11206,20 @@ Latest Mark: B
No match
/\btype\b\W*?\btext\b\W*?\bjavascript\b/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
First code unit = 't'
Last code unit = 't'
Subject length lower bound = 18
/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
Starting code units: < o t u
Subject length lower bound = 8
/a(*SKIP)c|b(*ACCEPT)|/I,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
a
@@ -11227,7 +11227,7 @@ Subject length lower bound = 0
0+
/a(*SKIP)c|b(*ACCEPT)cd(*ACCEPT)|x/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b x
Subject length lower bound = 0
ax
@@ -11245,14 +11245,14 @@ Subject length lower bound = 0
No match
/(*ACCEPT)a/I,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 0
bax
0:
0+ bax
/z(*ACCEPT)a/I,aftertext
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'z'
Subject length lower bound = 0
baxzbx
@@ -11364,17 +11364,6 @@ No match
\= Expect no match
aAz
No match
-
-/^\u{7a}/alt_bsux
- u{7a}
- 0: u{7a}
-\= Expect no match
- zoo
-No match
-
-/^\u{7a}/extra_alt_bsux
- zoo
- 0: z
/(?(?=c)c|d)++Y/B
------------------------------------------------------------------
@@ -11766,96 +11755,96 @@ Matched, but too many substrings
# settings of the anchored and startline bits.
/(?>.*?a)(?<=ba)/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 2
Last code unit = 'a'
Subject length lower bound = 1
/(?:.*?a)(?<=ba)/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 2
First code unit at start or follows newline
Last code unit = 'a'
Subject length lower bound = 1
/.*?a(*PRUNE)b/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'b'
Subject length lower bound = 2
/.*?a(*PRUNE)b/Is
-Capture group count = 0
+Capturing subpattern count = 0
Options: dotall
Last code unit = 'b'
Subject length lower bound = 2
/^a(*PRUNE)b/Is
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: dotall
Overall options: anchored dotall
First code unit = 'a'
Subject length lower bound = 2
/.*?a(*SKIP)b/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'b'
Subject length lower bound = 2
/(?>.*?a)b/Is
-Capture group count = 0
+Capturing subpattern count = 0
Options: dotall
Last code unit = 'b'
Subject length lower bound = 2
/(?>.*?a)b/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'b'
Subject length lower bound = 2
/(?>^a)b/Is
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: dotall
Overall options: anchored dotall
First code unit = 'a'
Subject length lower bound = 2
/(?>.*?)(?<=(abcd)|(wxyz))/I
-Capture group count = 2
+Capturing subpattern count = 2
Max lookbehind = 4
May match empty string
Subject length lower bound = 0
/(?>.*)(?<=(abcd)|(wxyz))/I
-Capture group count = 2
+Capturing subpattern count = 2
Max lookbehind = 4
May match empty string
Subject length lower bound = 0
"(?>.*)foo"I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'o'
Subject length lower bound = 3
"(?>.*?)foo"I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'o'
Subject length lower bound = 3
/(?>^abc)/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit at start or follows newline
Last code unit = 'c'
Subject length lower bound = 3
/(?>.*abc)/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Last code unit = 'c'
Subject length lower bound = 3
/(?:.*abc)/Im
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
First code unit at start or follows newline
Last code unit = 'c'
@@ -12068,8 +12057,8 @@ No match
0: ba
/(?|(?<n>f)|(?<n>b))/I,dupnames
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
n 1
Options: dupnames
Starting code units: b f
@@ -12090,9 +12079,9 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 3
+Capturing subpattern count = 3
Max back reference = 2
-Named capture groups:
+Named capturing subpatterns:
a 1
a 2
Options: dupnames
@@ -13135,7 +13124,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c d
Last code unit = 'd'
Subject length lower bound = 1
@@ -13148,7 +13137,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c
Last code unit = 'd'
Subject length lower bound = 2
@@ -13161,7 +13150,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c d
Last code unit = 'd'
Subject length lower bound = 1
@@ -13174,7 +13163,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c
Last code unit = 'd'
Subject length lower bound = 5
@@ -13187,7 +13176,7 @@ Subject length lower bound = 5
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: a b c d
Last code unit = 'd'
Subject length lower bound = 1
@@ -13265,40 +13254,40 @@ Failed: error 108 at offset 3: range out of order in character class
Failed: error 150 at offset 5: invalid range in character class
/(?<0abc>xx)/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/(?&1abc)xx(?<1abc>y)/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/(?<ab-cd>xx)/
-Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator)
/(?'0abc'xx)/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/(?P<0abc>xx)/
-Failed: error 144 at offset 4: subpattern name must start with a non-digit
+Failed: error 144 at offset 4: group name must start with a non-digit
/\k<5ghj>/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/\k'5ghj'/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/\k{2fgh}/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/(?P=8yuki)/
-Failed: error 144 at offset 4: subpattern name must start with a non-digit
+Failed: error 144 at offset 4: group name must start with a non-digit
/\g{4df}/
Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/(?&1abc)xx(?<1abc>y)/
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
+Failed: error 144 at offset 3: group name must start with a non-digit
/(?P>1abc)xx(?<1abc>y)/
-Failed: error 144 at offset 4: subpattern name must start with a non-digit
+Failed: error 144 at offset 4: group name must start with a non-digit
/\g'3gh'/
Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
@@ -13307,16 +13296,16 @@ Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed,
Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/(?(<4gh>)abc)/
-Failed: error 144 at offset 4: subpattern name must start with a non-digit
+Failed: error 144 at offset 4: group name must start with a non-digit
/(?('4gh')abc)/
-Failed: error 144 at offset 4: subpattern name must start with a non-digit
+Failed: error 144 at offset 4: group name must start with a non-digit
/(?(4gh)abc)/
Failed: error 124 at offset 4: missing closing parenthesis for condition
/(?(R&6yh)abc)/
-Failed: error 144 at offset 5: subpattern name must start with a non-digit
+Failed: error 144 at offset 5: group name must start with a non-digit
/(((a\2)|(a*)\g<-1>))*a?/B
------------------------------------------------------------------
@@ -13498,7 +13487,7 @@ No match
0+ dab
/(?(VERSION>=10.0)yes|no)/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 2
yesno
0: yes
@@ -13517,7 +13506,7 @@ Subject length lower bound = 2
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
yesno
@@ -13525,7 +13514,7 @@ Subject length lower bound = 0
0+ yesno
/(?(VERSION=8)yes|no){3}/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 6
yesnononoyes
0: nonono
@@ -13534,9 +13523,9 @@ Subject length lower bound = 6
No match
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
VERSION 1
Starting code units: a x
Subject length lower bound = 5
@@ -13564,18 +13553,18 @@ Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION con
Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition
/abcd/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'a'
Last code unit = 'd'
Subject length lower bound = 4
/abcd/I,no_start_optimize
-Capture group count = 0
+Capturing subpattern count = 0
Options: no_start_optimize
Subject length lower bound = 0
/(|ab)*?d/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a d
Last code unit = 'd'
Subject length lower bound = 1
@@ -13586,7 +13575,7 @@ Subject length lower bound = 1
0: d
/(|ab)*?d/I,no_start_optimize
-Capture group count = 1
+Capturing subpattern count = 1
Options: no_start_optimize
Subject length lower bound = 0
abd
@@ -13850,21 +13839,21 @@ Copy substring 1 failed (-2): partial match
get substring list failed (-2): partial match
/^abc/info
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3
/^abc/info,no_dotstar_anchor
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: no_dotstar_anchor
Overall options: anchored no_dotstar_anchor
First code unit = 'a'
Subject length lower bound = 3
/.*\d/info,auto_callout
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout
First code unit at start or follows newline
Subject length lower bound = 1
@@ -13879,7 +13868,7 @@ Subject length lower bound = 1
No match
/.*\d/info,no_dotstar_anchor,auto_callout
-Capture group count = 0
+Capturing subpattern count = 0
Options: auto_callout no_dotstar_anchor
Subject length lower bound = 1
\= Expect no match
@@ -13900,18 +13889,18 @@ Subject length lower bound = 1
No match
/.*\d/dotall,info
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: dotall
Overall options: anchored dotall
Subject length lower bound = 1
/.*\d/dotall,no_dotstar_anchor,info
-Capture group count = 0
+Capturing subpattern count = 0
Options: dotall no_dotstar_anchor
Subject length lower bound = 1
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: no_dotstar_anchor
Subject length lower bound = 1
@@ -14334,7 +14323,7 @@ Failed: error 125 at offset 1: lookbehind assertion is not fixed length
0+
/((?2){73}(?2))((?1))/info
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
@@ -14394,8 +14383,8 @@ Failed: error 115 at offset 2: reference to non-existent subpattern
/(?x:((?'a')) # comment (with parentheses) and | vertical
(?-x:#not a comment (?'b')) # this is a comment ()
(?'c')) # not a comment (?'d')/info
-Capture group count = 5
-Named capture groups:
+Capturing subpattern count = 5
+Named capturing subpatterns:
a 2
b 3
c 4
@@ -14405,8 +14394,8 @@ Last code unit = ' '
Subject length lower bound = 32
/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames
-Capture group count = 3
-Named capture groups:
+Capturing subpattern count = 3
+Named capturing subpatterns:
a 1
a 2
b 3
@@ -14493,7 +14482,7 @@ Failed: error 162 at offset 4: subpattern name expected
"(?J:(?|(?'R')(\k'R')|((?'R'))))"
/(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/
-Failed: error 161 at offset 17: subpattern number is too big
+Failed: error 161 at offset 17: group number is too big
/^(?:(?(1)x|)+)+$()/B
------------------------------------------------------------------
@@ -14544,8 +14533,8 @@ Failed: error 162 at offset 10: subpattern name expected
Failed: error 162 at offset 14: subpattern name expected
/((?x)(?#))#(?'abc')/I
-Capture group count = 2
-Named capture groups:
+Capturing subpattern count = 2
+Named capturing subpatterns:
abc 2
First code unit = '#'
Subject length lower bound = 1
@@ -14554,8 +14543,8 @@ Subject length lower bound = 1
Failed: error 162 at offset 9: subpattern name expected
/[[:\\](?'abc')[a:]/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
abc 1
Starting code units: : [ \
Subject length lower bound = 2
@@ -14589,16 +14578,16 @@ Failed: error 158 at offset 3: (?R (recursive pattern call) must be followed by
Failed: error 162 at offset 16: subpattern name expected
/(?(?C{\Q})(?!(?'abc')))/I
-Capture group count = 1
-Named capture groups:
+Capturing subpattern count = 1
+Named capturing subpatterns:
abc 1
May match empty string
Subject length lower bound = 0
/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
-Capture group count = 8
+Capturing subpattern count = 8
Max back reference = 8
-Named capture groups:
+Named capturing subpatterns:
R 7
R 8
Duplicate name status changes
@@ -14606,33 +14595,33 @@ Last code unit = '0'
Subject length lower bound = 65535
/(?|(aaa)|(b))\g{1}/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Starting code units: a b
Subject length lower bound = 0
/(?|(aaa)|(b))(?1)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 4
/(?|(aaa)|(b))/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a b
Subject length lower bound = 1
/(?|(?'a'aaa)|(?'a'b))\k'a'/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
-Named capture groups:
+Named capturing subpatterns:
a 1
Starting code units: a b
Subject length lower bound = 0
/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
-Named capture groups:
+Named capturing subpatterns:
a 1
a 2
Options: dupnames
@@ -14684,10 +14673,10 @@ No match
0: ab
/(?(8000000000/
-Failed: error 161 at offset 8: subpattern number is too big
+Failed: error 161 at offset 8: group number is too big
/((?(R8000000000)))/
-Failed: error 161 at offset 9: subpattern number is too big
+Failed: error 161 at offset 9: group number is too big
/0(?0)|(1)(*THEN)(*SKIP:0)(*FAIL)/
\= Expect no match
@@ -14695,7 +14684,7 @@ Failed: error 161 at offset 9: subpattern number is too big
No match
/(?(1)()\983040\2)/
-Failed: error 161 at offset 14: subpattern number is too big
+Failed: error 161 at offset 14: group number is too big
/(*LIMIT_MATCH=)abc/
Failed: error 160 at offset 14: (*VERB) not recognized or malformed
@@ -14894,7 +14883,7 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
1: AAbbaa..AAbBaa
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 1
Compile options: <none>
Overall options: anchored
@@ -14903,10 +14892,10 @@ Last code unit = '}'
Subject length lower bound = 65535
/((p(?'K/
-Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
/((p(?'K/no_auto_capture
-Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?)
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
/abc/replace=A$3123456789Z
abc
@@ -14916,7 +14905,7 @@ Failed: error -49 at offset 3 in replacement: unknown substring
Failed: error 187 at offset 0: lookbehind assertion is too long
/(?<!a{65535})x/I
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 65535
First code unit = 'x'
Subject length lower bound = 1
@@ -14930,7 +14919,7 @@ Failed: error -60: match with end before start or start moved backwards is not s
Failed: error -60: match with end before start or start moved backwards is not supported
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
-Failed: error 148 at offset 36: subpattern name is too long (maximum 32 code units)
+Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
@@ -14952,7 +14941,7 @@ Failed: error 188 at offset 0: pattern string is longer than the limit set by th
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))"xI
-Capture group count = 12
+Capturing subpattern count = 12
May match empty string
Options: extended
First code unit at start or follows newline
@@ -14963,7 +14952,7 @@ Subject length lower bound = 0
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
a)"xI
-Capture group count = 12
+Capturing subpattern count = 12
Max lookbehind = 2
May match empty string
Options: extended
@@ -14973,7 +14962,7 @@ Subject length lower bound = 0
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))"xI
-Capture group count = 13
+Capturing subpattern count = 13
May match empty string
Options: extended
Subject length lower bound = 0
@@ -14990,14 +14979,14 @@ Failed: error 135 at offset 9: lookbehind is too complicated
/\[()]{1024}/I,expand
Expanded: ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
-Capture group count = 1024
+Capturing subpattern count = 1024
May match empty string
Subject length lower bound = 0
# Test minlength capped at 65535
/(A{65000})\1{65000}/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'A'
Last code unit = 'A'
@@ -15019,7 +15008,7 @@ Subject length lower bound = 65535
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
@@ -15037,7 +15026,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
@@ -15055,7 +15044,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
@@ -15073,7 +15062,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
@@ -15521,7 +15510,7 @@ Failed: error 150 at offset 5: invalid range in character class
------------------------------------------------------------------
/(?=.*[A-Z])/I
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Subject length lower bound = 0
@@ -15603,7 +15592,7 @@ Failed: error -33: bad offset value
# complicated to find a minimum matching length.
"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
-Capture group count = 108
+Capturing subpattern count = 108
Max back reference = 22
Contains explicit CR or LF match
Subject length lower bound = 1
@@ -15637,29 +15626,29 @@ Failed: error 128 at offset 11: assertion expected after (?( or (?(?C)
Failed: error -37: callout error code
/()(\g+65534)/
-Failed: error 161 at offset 11: subpattern number is too big
+Failed: error 161 at offset 11: group number is too big
/()(\g+65533)/
Failed: error 115 at offset 10: reference to non-existent subpattern
/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
First code unit = \xc1
Last code unit = '2'
Subject length lower bound = 65535
/(a)(b)\2\1\1\1\1/I
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 7
/(?<a>a)(?<b>b)\g{b}\g{a}\g{a}\g{a}\g{a}(?<a>xx)(?<b>zz)/I,dupnames
-Capture group count = 4
+Capturing subpattern count = 4
Max back reference = 4
-Named capture groups:
+Named capturing subpatterns:
a 1
a 3
b 2
@@ -15827,14 +15816,14 @@ No match
# the start of a branch.
/(?(DEFINE)(a))^bc/I
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
First code unit = 'b'
Subject length lower bound = 2
/(a){0}.*bc/sI
-Capture group count = 1
+Capturing subpattern count = 1
Compile options: dotall
Overall options: anchored dotall
Last code unit = 'c'
@@ -15844,7 +15833,7 @@ Subject length lower bound = 2
# no alternative branch.
/(?(VERSION>=999)yes)^bc/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Subject length lower bound = 2
@@ -15852,12 +15841,12 @@ Subject length lower bound = 2
# This should not be anchored.
/(?(VERSION>=999)yes|no)^bc/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'c'
Subject length lower bound = 4
/(*LIMIT_HEAP=0)xxx/I
-Capture group count = 0
+Capturing subpattern count = 0
Heap limit = 0
First code unit = 'x'
Last code unit = 'x'
@@ -16133,7 +16122,7 @@ Failed: error -37: callout error code
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended_more
Starting code units: a b c
Subject length lower bound = 1
@@ -16145,7 +16134,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended extended_more
Starting code units: a b c
Subject length lower bound = 1
@@ -16229,40 +16218,22 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Extra options: bad_escape_is_literal
First code unit = 'N'
Last code unit = 'c'
Subject length lower bound = 3
-/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal
+/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
------------------------------------------------------------------
Bra
- [Abgjoxz{}]
+ [A-Nb-gjoxz{}]
Ket
End
------------------------------------------------------------------
/[Q-\N]/B,bad_escape_is_literal
-Failed: error 150 at offset 5: invalid range in character class
-
-/[\s-_]/bad_escape_is_literal
-Failed: error 150 at offset 3: invalid range in character class
-
-/[_-\s]/bad_escape_is_literal
-Failed: error 150 at offset 5: invalid range in character class
-
-/[\B\R\X]/B
-Failed: error 107 at offset 2: escape sequence is invalid in character class
-
-/[\B\R\X]/B,bad_escape_is_literal
-Failed: error 107 at offset 2: escape sequence is invalid in character class
-
-/[A-\BP-\RV-\X]/B
-Failed: error 107 at offset 4: escape sequence is invalid in character class
-
-/[A-\BP-\RV-\X]/B,bad_escape_is_literal
-Failed: error 107 at offset 4: escape sequence is invalid in character class
+Failed: error 108 at offset 4: range out of order in character class
# ----------------------------------------------------------------------
@@ -16335,7 +16306,7 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL
0: (*CR)abc
/cat|dog/I,match_word
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 1
Extra options: match_word
Starting code units: c d
@@ -16351,7 +16322,7 @@ No match
No match
/(cat)|dog/I,match_line,literal
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: literal
Overall options: anchored literal
Extra options: match_line
@@ -16419,7 +16390,7 @@ Failed: error 150 at offset 3: invalid range in character class
# Perl gets the second of these wrong, giving no match.
"(?<=(a))\1?b"I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Max lookbehind = 1
Last code unit = 'b'
@@ -16432,7 +16403,7 @@ Subject length lower bound = 1
1: a
"(?=(a))\1?b"I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'a'
Last code unit = 'b'
@@ -16661,21 +16632,21 @@ Failed: error 194 at offset 3: invalid hyphen in option setting
Failed: error 194 at offset 5: invalid hyphen in option setting
/(?(?=^))b/I
-Capture group count = 0
+Capturing subpattern count = 0
Last code unit = 'b'
Subject length lower bound = 1
abc
0: b
/(?(?=^)|)b/I
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = 'b'
Subject length lower bound = 1
abc
0: b
/(?(?=^)|^)b/I
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
First code unit = 'b'
@@ -16687,7 +16658,7 @@ Subject length lower bound = 1
No match
/(?(1)^|^())/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Compile options: <none>
@@ -16695,13 +16666,13 @@ Overall options: anchored
Subject length lower bound = 0
/(?(1)^())b/I
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Last code unit = 'b'
Subject length lower bound = 1
/(?(1)^())+b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Last code unit = 'b'
Subject length lower bound = 1
@@ -16710,7 +16681,7 @@ Subject length lower bound = 1
0+ c
/(?(1)^()|^)+b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Compile options: <none>
Overall options: anchored
@@ -16724,7 +16695,7 @@ Subject length lower bound = 1
No match
/(?(1)^()|^)*b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
First code unit = 'b'
Subject length lower bound = 1
@@ -16739,7 +16710,7 @@ Subject length lower bound = 1
0+ c
/(?(1)^())+b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Last code unit = 'b'
Subject length lower bound = 1
@@ -16748,7 +16719,7 @@ Subject length lower bound = 1
0+ c
/(?(1)^a()|^a)+b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Compile options: <none>
Overall options: anchored
@@ -16763,7 +16734,7 @@ Subject length lower bound = 2
No match
/(?(1)^|^(a))+b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Compile options: <none>
Overall options: anchored
@@ -16778,7 +16749,7 @@ Subject length lower bound = 1
No match
/(?(1)^a()|^a)*b/I,aftertext
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
Last code unit = 'b'
Subject length lower bound = 1
@@ -16792,148 +16763,6 @@ Subject length lower bound = 1
0: b
0+ c
-/a(b)c|xyz/g,allvector,replace=<$0>
- abcdefabcpqr\=ovector=4
- 2: <abc>def<abc>pqr
- 0: 6 9
- 1: 7 8
- 2: <unchanged>
- 3: <unchanged>
- abxyz\=ovector=4
- 1: ab<xyz>
- 0: 2 5
- 1: <unset>
- 2: <unchanged>
- 3: <unchanged>
- abcdefxyz\=ovector=4
- 2: <abc>def<xyz>
- 0: 6 9
- 1: <unset>
- 2: <unchanged>
- 3: <unchanged>
-
-/a(b)c|xyz/allvector
- abcdef\=ovector=4
- 0: abc
- 1: b
- 2: <unchanged>
- 3: <unchanged>
- abxyz\=ovector=4
- 0: xyz
- 1: <unset>
- 2: <unchanged>
- 3: <unchanged>
-
-/a(b)c|xyz/g,replace=<$0>,substitute_callout
- abcdefabcpqr
- 1(2) Old 0 3 "abc" New 0 5 "<abc>"
- 2(2) Old 6 9 "abc" New 8 13 "<abc>"
- 2: <abc>def<abc>pqr
- abxyzpqrabcxyz
- 1(1) Old 2 5 "xyz" New 2 7 "<xyz>"
- 2(2) Old 8 11 "abc" New 10 15 "<abc>"
- 3(1) Old 11 14 "xyz" New 15 20 "<xyz>"
- 3: ab<xyz>pqr<abc><xyz>
- 12abc34xyz99abc55\=substitute_stop=2
- 1(2) Old 2 5 "abc" New 2 7 "<abc>"
- 2(1) Old 7 10 "xyz" New 9 14 "<xyz> STOPPED"
- 2: 12<abc>34xyz99abc55
- 12abc34xyz99abc55\=substitute_skip=1
- 1(2) Old 2 5 "abc" New 2 7 "<abc> SKIPPED"
- 2(1) Old 7 10 "xyz" New 7 12 "<xyz>"
- 3(2) Old 12 15 "abc" New 14 19 "<abc>"
- 3: 12abc34<xyz>99<abc>55
- 12abc34xyz99abc55\=substitute_skip=2
- 1(2) Old 2 5 "abc" New 2 7 "<abc>"
- 2(1) Old 7 10 "xyz" New 9 14 "<xyz> SKIPPED"
- 3(2) Old 12 15 "abc" New 14 19 "<abc>"
- 3: 12<abc>34xyz99<abc>55
-
-/a(b)c|xyz/g,replace=<$0>
- abcdefabcpqr
- 2: <abc>def<abc>pqr
- abxyzpqrabcxyz
- 3: ab<xyz>pqr<abc><xyz>
- 12abc34xyz\=substitute_stop=2
- 1(2) Old 2 5 "abc" New 2 7 "<abc>"
- 2(1) Old 7 10 "xyz" New 9 14 "<xyz> STOPPED"
- 2: 12<abc>34xyz
- 12abc34xyz\=substitute_skip=1
- 1(2) Old 2 5 "abc" New 2 7 "<abc> SKIPPED"
- 2(1) Old 7 10 "xyz" New 7 12 "<xyz>"
- 2: 12abc34<xyz>
-
-/a(b)c|xyz/replace=<$0>
- abcdefabcpqr
- 1: <abc>defabcpqr
- 12abc34xyz\=substitute_skip=1
- 1(2) Old 2 5 "abc" New 2 7 "<abc> SKIPPED"
- 1: 12abc34xyz
- 12abc34xyz\=substitute_stop=1
- 1(2) Old 2 5 "abc" New 2 7 "<abc> STOPPED"
- 1: 12abc34xyz
-
-/abc\rdef/
- abc\ndef
-No match
-
-/abc\rdef\x{0d}xyz/escaped_cr_is_lf
- abc\ndef\rxyz
- 0: abc\x0adef\x0dxyz
-\= Expect no match
- abc\ndef\nxyz
-No match
-
-/(?(*ACCEPT)xxx)/
-Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
-
-/(?(*atomic:xx)xxx)/
-Failed: error 128 at offset 10: assertion expected after (?( or (?(?C)
-
-/(?(*script_run:xxx)zzz)/
-Failed: error 128 at offset 14: assertion expected after (?( or (?(?C)
-
-/foobar/
- the foobar thing\=copy_matched_subject
- 0: foobar
- the foobar thing\=copy_matched_subject,zero_terminate
- 0: foobar
-
-/foobar/g
- the foobar thing foobar again\=copy_matched_subject
- 0: foobar
- 0: foobar
-
-/(*:XX)^abc/I
-Capture group count = 0
-Compile options: <none>
-Overall options: anchored
-First code unit = 'a'
-Subject length lower bound = 3
-
-/(*COMMIT:XX)^abc/I
-Capture group count = 0
-Compile options: <none>
-Overall options: anchored
-First code unit = 'a'
-Subject length lower bound = 3
-
-/(*ACCEPT:XX)^abc/I
-Capture group count = 0
-Subject length lower bound = 0
-
-/abc/replace=xyz
- abc\=null_context
- 1: xyz
-
-/abc/replace=xyz,substitute_callout
- abc
- 1(1) Old 0 3 "abc" New 0 3 "xyz"
- 1: xyz
-\= Expect error message
- abc\=null_context
-** Replacement callouts are not supported with null_context.
-
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
diff --git a/dist2/testdata/testoutput20 b/dist2/testdata/testoutput20
index 5ce720fa..d6265fdd 100644
--- a/dist2/testdata/testoutput20
+++ b/dist2/testdata/testoutput20
@@ -32,9 +32,9 @@
#load testsaved2
#pop info
-Capture group count = 2
+Capturing subpattern count = 2
Max back reference = 2
-Named capture groups:
+Named capturing subpatterns:
n 1
n 2
Options: dupnames
@@ -66,8 +66,8 @@ No match, mark = A
4: A
#pop info
-Capture group count = 4
-Named capture groups:
+Capturing subpattern count = 4
+Named capturing subpatterns:
ADDR 2
ADDRESS_PAT 4
NAME 1
diff --git a/dist2/testdata/testoutput21 b/dist2/testdata/testoutput21
index fbd74004..cba13261 100644
--- a/dist2/testdata/testoutput21
+++ b/dist2/testdata/testoutput21
@@ -79,7 +79,7 @@
Failed: error 183 at offset 4: using \C is disabled by the application
/ab\Cde/info
-Capture group count = 0
+Capturing subpattern count = 0
Contains \C
First code unit = 'a'
Last code unit = 'e'
diff --git a/dist2/testdata/testoutput22-16 b/dist2/testdata/testoutput22-16
index 4b6008ce..df29e14d 100644
--- a/dist2/testdata/testoutput22-16
+++ b/dist2/testdata/testoutput22-16
@@ -4,7 +4,7 @@
# in some widths and not in others.
/ab\Cde/utf,info
-Capture group count = 0
+Capturing subpattern count = 0
Contains \C
Options: utf
First code unit = 'a'
diff --git a/dist2/testdata/testoutput22-32 b/dist2/testdata/testoutput22-32
index e96696a9..f0b7984a 100644
--- a/dist2/testdata/testoutput22-32
+++ b/dist2/testdata/testoutput22-32
@@ -4,7 +4,7 @@
# in some widths and not in others.
/ab\Cde/utf,info
-Capture group count = 0
+Capturing subpattern count = 0
Contains \C
Options: utf
First code unit = 'a'
diff --git a/dist2/testdata/testoutput22-8 b/dist2/testdata/testoutput22-8
index 5dd167ec..0a04aa81 100644
--- a/dist2/testdata/testoutput22-8
+++ b/dist2/testdata/testoutput22-8
@@ -4,7 +4,7 @@
# in some widths and not in others.
/ab\Cde/utf,info
-Capture group count = 0
+Capturing subpattern count = 0
Contains \C
Options: utf
First code unit = 'a'
diff --git a/dist2/testdata/testoutput3 b/dist2/testdata/testoutput3
index 801966a9..d9e8c5c1 100644
--- a/dist2/testdata/testoutput3
+++ b/dist2/testdata/testoutput3
@@ -78,13 +78,13 @@ No match
0: école
/\w/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
/\w/I,locale=fr_FR
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â
@@ -153,7 +153,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í
diff --git a/dist2/testdata/testoutput3A b/dist2/testdata/testoutput3A
index d7a223ab..9b00be8b 100644
--- a/dist2/testdata/testoutput3A
+++ b/dist2/testdata/testoutput3A
@@ -78,13 +78,13 @@ No match
0: école
/\w/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
/\w/I,locale=fr_FR
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â
@@ -153,7 +153,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í
diff --git a/dist2/testdata/testoutput3B b/dist2/testdata/testoutput3B
index b18d441b..b30fc5f1 100644
--- a/dist2/testdata/testoutput3B
+++ b/dist2/testdata/testoutput3B
@@ -78,13 +78,13 @@ No match
0: école
/\w/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
/\w/I,locale=fr_FR
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â
@@ -153,7 +153,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í
diff --git a/dist2/testdata/testoutput4 b/dist2/testdata/testoutput4
index 84b8b9ee..ba3df376 100644
--- a/dist2/testdata/testoutput4
+++ b/dist2/testdata/testoutput4
@@ -3741,275 +3741,5 @@ No match
/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf
\x{99}\x{99}\x{99}
0: \x{99}\x{99}\x{99}
-
-# Script run tests
-
-/^(*script_run:.{4})/utf
- abcd Latin x4
- 0: abcd
- \x{2e80}\x{2fa1d}\x{3041}\x{30a1} Han Han Hiragana Katakana
- 0: \x{2e80}\x{2fa1d}\x{3041}\x{30a1}
- \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
- 0: \x{3041}\x{30a1}\x{3007}\x{3007}
- \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
- 0: \x{30a1}\x{3041}\x{3007}\x{3007}
- \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
- 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
- \x{2e80}\x{3105}\x{2e80}\x{3105} Han Bopomofo Han Bopomofo
- 0: \x{2e80}\x{3105}\x{2e80}\x{3105}
- \x{02ea}\x{2e80}\x{2e80}\x{3105} Bopomofo-Sk Han Han Bopomofo
- 0: \x{2ea}\x{2e80}\x{2e80}\x{3105}
- \x{3105}\x{2e80}\x{2e80}\x{3105} Bopomofo Han Han Bopomofo
- 0: \x{3105}\x{2e80}\x{2e80}\x{3105}
- \x{0300}cd! Inherited Latin Latin Common
- 0: \x{300}cd!
- \x{0391}12\x{03a9} Greek Common-digits Greek
- 0: \x{391}12\x{3a9}
- \x{0400}12\x{fe2f} Cyrillic Common-digits Cyrillic
- 0: \x{400}12\x{fe2f}
- \x{0531}12\x{fb17} Armenian Common-digits Armenian
- 0: \x{531}12\x{fb17}
- \x{0591}12\x{fb4f} Hebrew Common-digits Hebrew
- 0: \x{591}12\x{fb4f}
- \x{0600}12\x{1eef1} Arabic Common-digits Arabic
- 0: \x{600}12\x{1eef1}
- \x{0600}\x{0660}\x{0669}\x{1eef1} Arabic Arabic-digits Arabic
- 0: \x{600}\x{660}\x{669}\x{1eef1}
- \x{0700}12\x{086a} Syriac Common-digits Syriac
- 0: \x{700}12\x{86a}
- \x{1200}12\x{ab2e} Ethiopic Common-digits Ethiopic
- 0: \x{1200}12\x{ab2e}
- \x{1680}12\x{169c} Ogham Common-digits Ogham
- 0: \x{1680}12\x{169c}
- \x{3041}12\x{3041} Hiragana Common-digits Hiragana
- 0: \x{3041}12\x{3041}
- \x{0980}\x{09e6}\x{09e7}\x{0993} Bengali Bengali-digits Bengali
- 0: \x{980}\x{9e6}\x{9e7}\x{993}
- !cde Common Latin Latin Latin
- 0: !cde
- A..B Latin Common Common Latin
- 0: A..B
- 0abc Ascii-digit Latin Latin Latin
- 0: 0abc
- 1\x{0700}\x{0700}\x{0700} Ascii-digit Syriac x 3
- 0: 1\x{700}\x{700}\x{700}
- \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters
- 0: \x{1a80}\x{1a80}\x{1a40}\x{1a41}
-\= Expect no match
- a\x{370}bcd Latin Greek Latin Latin
-No match
- \x{1100}\x{02ea}\x{02ea}\x{02ea} Hangul Bopomofo x3
-No match
- \x{02ea}\x{02ea}\x{02ea}\x{1100} Bopomofo x3 Hangul
-No match
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
-No match
- \x{0391}\x{09e6}\x{09e7}\x{03a9} Greek Bengali digits Greek
-No match
- \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic
-No match
- \x{0600}\x{0669}7\x{1eef1} Arabic Arabic-digit ascii-digit Arabic
-No match
- A5\x{ff19}B Latin Common-ascii/notascii-digits Latin
-No match
- \x{0300}cd\x{0391} Inherited Latin Latin Greek
-No match
- !cd\x{0391} Common Latin Latin Greek
-No match
- \x{1A80}\x{1A90}\x{1a40}\x{1a41} Tai Tham Hora digit, Tham digit, letters
-No match
- A\x{1d7ce}\x{1d7ff}B Common fancy-common-2-sets-digits Common
-No match
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-No match
-
-/^(*sr:.{4}|..)/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- 0: \x{2e80}\x{3105}
-
-/^(*atomic_script_run:.{4}|..)/utf
-\= Expect no match
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-No match
-
-/^(*asr:.*)/utf
-\= Expect no match
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
-No match
-
-/^(?>(*sr:.*))/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- 0: \x{2e80}\x{3105}\x{2e80}
-
-/^(*sr:.*)/utf
- \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
- 0: \x{2e80}\x{3105}\x{2e80}
- \x{10fffd}\x{10fffd}\x{10fffd} Private use (Unknown)
- 0: \x{10fffd}
-
-/^(*sr:\x{2e80}*)/utf
- \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo
- 0: \x{2e80}\x{2e80}
-
-/^(*sr:\x{2e80}*)\x{2e80}/utf
- \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo
- 0: \x{2e80}\x{2e80}
-
-/^(*sr:.*)Test/utf
- Test script run on an empty string
- 0: Test
-
-/^(*sr:(.{2})){2}/utf
- \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic
- 0: \x{600}7\x{669}\x{1eef1}
- 1: \x{669}\x{1eef1}
- \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters
- 0: \x{1a80}\x{1a80}\x{1a40}\x{1a41}
- 1: \x{1a40}\x{1a41}
- \x{1A80}\x{1a40}\x{1A90}\x{1a41} Tai Tham Hora digit, letter, Tham digit, letter
- 0: \x{1a80}\x{1a40}\x{1a90}\x{1a41}
- 1: \x{1a90}\x{1a41}
-\= Expect no match
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
-No match
-
-/^(*sr:\S*)/utf
- \x{1cf4}\x{20f0}\x{900}\x{11305} [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Gran
- 0: \x{1cf4}\x{20f0}\x{900}
- \x{1cf4}\x{20f0}\x{11305}\x{900} [Dev,Gran,Kan] [Dev,Gran,Lat] Gran Dev
- 0: \x{1cf4}\x{20f0}\x{11305}
- \x{1cf4}\x{20f0}\x{900}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Lat
- 0: \x{1cf4}\x{20f0}\x{900}
- \x{1cf4}\x{20f0}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Lat
- 0: \x{1cf4}\x{20f0}
- \x{20f0}ABC [Dev,Gran,Lat] Lat
- 0: \x{20f0}ABC
- XYZ\x{20f0}ABC Lat [Dev,Gran,Lat] Lat
- 0: XYZ\x{20f0}ABC
- \x{a36}\x{a33}\x{900} [Dev,...] [Dev,...] Dev
- 0: \x{a36}\x{a33}
- \x{3001}\x{2e80}\x{3041}\x{30a1} [Bopo, Han, etc] Han Hira Kata
- 0: \x{3001}\x{2e80}\x{3041}\x{30a1}
- \x{3001}\x{30a1}\x{2e80}\x{3041} [Bopo, Han, etc] Kata Han Hira
- 0: \x{3001}\x{30a1}\x{2e80}\x{3041}
- \x{3001}\x{3105}\x{2e80}\x{1101} [Bopo, Han, etc] Bopomofo Han Hangul
- 0: \x{3001}\x{3105}\x{2e80}
- \x{3105}\x{3001}\x{2e80}\x{1101} Bopomofo [Bopo, Han, etc] Han Hangul
- 0: \x{3105}\x{3001}\x{2e80}
- \x{3031}\x{3041}\x{30a1}\x{2e80} [Hira Kata] Hira Kata Han
- 0: \x{3031}\x{3041}\x{30a1}\x{2e80}
- \x{060c}\x{06d4}\x{0600}\x{10d00}\x{0700} [Arab Rohg Syrc Thaa] [Arab Rohg] Arab Rohg Syrc
- 0: \x{60c}\x{6d4}\x{600}
- \x{060c}\x{06d4}\x{0700}\x{0600}\x{10d00} [Arab Rohg Syrc Thaa] [Arab Rohg] Syrc Arab Rohg
- 0: \x{60c}\x{6d4}
- \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80} Han Hira [Bopo, Han, etc] [Hira Kata] Han
- 0: \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80}
-
-/(?<!)(*sr:)/
-
-/(?<!X(*sr:B)C)/
-
-/(?<=abc(?=X(*sr:BCY)Z)XBCYZ)./
- abcXBCYZ!
- 0: !
-
-/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./
- abcXBXYCCC!
- 0: !
-
-/^(*sr:\S*)/utf
- \x{10d00}\x{10d00}\x{06d4} Rohingya Rohingya Arabic-full-stop
- 0: \x{10d00}\x{10d00}\x{6d4}
- \x{06d4}\x{10d00}\x{10d00} Arabic-full-stop Rohingya Rohingya
- 0: \x{6d4}\x{10d00}\x{10d00}
- \x{10d00}\x{10d00}\x{0363} Rohingya Rohingya Inherited-extend-Latin
- 0: \x{10d00}\x{10d00}
- \x{0363}\x{10d00}\x{10d00} Inherited-extend-Latin Rohingya Rohingya
- 0: \x{363}
- AB\x{0363} Latin Latin Inherited-extend-Latin
- 0: AB\x{363}
- \x{0363}AB Inherited-extend-Latin Latin Latin
- 0: \x{363}AB
- AB\x{1cf7} Latin Latin Common-extended-Beng
- 0: AB
- \x{1cf7}AB Common-extend-Beng Latin Latin
- 0: \x{1cf7}
- \x{1cf7}\x{0993} Common-extend-Beng Bengali
- 0: \x{1cf7}\x{993}
- A\x{1abe}BC Test enclosing mark
- 0: A\x{1abe}BC
- \x{0370}\x{1abe}\x{0371} Which can occur with any script (Greek here)
- 0: \x{370}\x{1abe}\x{371}
- \x{3001}\x{adf9}\x{3001} [.. Hangul ..] Hangul [.. Hangul ..]
- 0: \x{3001}\x{adf9}\x{3001}
- \x{3400}\x{3001}XXX Han [Han etc.]
- 0: \x{3400}\x{3001}
- \x{3400}\x{1cd5} Han [Bengali Devanagari]
- 0: \x{3400}
- \x{ac01}\x{3400} Hangul [.. Hangul ..]
- 0: \x{ac01}\x{3400}
- \x{ac01}\x{1cd5} Hangul [Bengali Devanagari]
- 0: \x{ac01}
- \x{102e0}\x{06d4}\x{1ee4d} [Arabic Coptic] [Arab Rohingya] Arabic
- 0: \x{102e0}\x{6d4}\x{1ee4d}
- \x{102e0}\x{06d4}\x{2cc9} [Arabic Coptic] [Arab Rohingya] Coptic
- 0: \x{102e0}\x{6d4}
- \x{102e0}\x{06d4}\x{10d30} [Arabic Coptic] [Arab Rohingya] Rohingya
- 0: \x{102e0}\x{6d4}
-
-# Test loop breaking for empty string match
-
-/^(*sr:A|)*BCD/utf
- AABCD
- 0: AABCD
- ABCD
- 0: ABCD
- BCD
- 0: BCD
-
-# The use of (*ACCEPT) breaks script run checking
-
-/^(*sr:.*(*ACCEPT)ZZ)/utf
- \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
- 0: \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul
-
-# -------
-
-# Test group names containing non-ASCII letters and digits
-
-/(?'ABáC'...)\g{ABáC}/utf
- abcabcdefg
- 0: abcabc
- 1: abc
-
-/(?'XÊ°ABC'...)/utf
- xyzpq
- 0: xyz
- 1: xyz
-
-/(?'X×ABC'...)/utf
- 12345
- 0: 123
- 1: 123
-
-/(?'XᾈABC'...)/utf
- %^&*(...
- 0: %^&
- 1: %^&
-
-/(?'ð¨ABC'...)/utf
- abcde
- 0: abc
- 1: abc
-
-/^(?'×ABC'...)(?&×ABC)(?P=×ABC)/utf
- 123123123456
- 0: 123123123
- 1: 123
-
-/^(?'×ABC'...)(?&×ABC)/utf
- 123123123456
- 0: 123123
- 1: 123
# End of testinput4
diff --git a/dist2/testdata/testoutput5 b/dist2/testdata/testoutput5
index 5d64d00c..51caa181 100644
--- a/dist2/testdata/testoutput5
+++ b/dist2/testdata/testoutput5
@@ -147,7 +147,7 @@ Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0x
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -164,7 +164,7 @@ Subject length lower bound = 4
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Last code unit = 'X'
Subject length lower bound = 4
@@ -179,7 +179,7 @@ Subject length lower bound = 4
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 3
\x{212ab}\x{212ab}\x{212ab}\x{861}
@@ -193,7 +193,7 @@ Subject length lower bound = 3
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: a b
@@ -238,7 +238,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: utf
Subject length lower bound = 0
@@ -251,7 +251,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Subject length lower bound = 1
@@ -264,7 +264,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'a'
Last code unit = 'b'
@@ -291,7 +291,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
First code unit = \xff
Subject length lower bound = 1
>\xff<
@@ -304,7 +304,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[Ä-Ü]/utf
@@ -343,7 +343,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Options: utf
Last code unit = 'z'
Subject length lower bound = 7
@@ -363,7 +363,7 @@ Subject length lower bound = 7
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Options: utf
Subject length lower bound = 0
@@ -394,7 +394,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Options: utf
Subject length lower bound = 0
@@ -414,7 +414,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Options: utf
Subject length lower bound = 0
@@ -445,7 +445,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 2
+Capturing subpattern count = 2
May match empty string
Options: utf
Subject length lower bound = 0
@@ -471,7 +471,7 @@ Subject length lower bound = 0
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Compile options: no_start_optimize utf
Overall options: anchored no_start_optimize utf
Subject length lower bound = 0
@@ -713,7 +713,7 @@ No match
0: \x{1ec5}
/a\Rb/I,bsr=anycrlf,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches CR, LF, or CRLF
First code unit = 'a'
@@ -732,7 +732,7 @@ No match
No match
/a\Rb/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
First code unit = 'a'
@@ -750,7 +750,7 @@ Subject length lower bound = 3
0: a\x{0b}b
/a\R?b/I,bsr=anycrlf,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches CR, LF, or CRLF
First code unit = 'a'
@@ -769,7 +769,7 @@ No match
No match
/a\R?b/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
First code unit = 'a'
@@ -798,7 +798,7 @@ No match
/[[:a\x{100}b:]]/utf
Failed: error 130 at offset 3: unknown POSIX class name
-/a[^]b/utf,allow_empty_class,match_unset_backref
+/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
a\x{1234}b
0: a\x{1234}b
a\nb
@@ -807,7 +807,7 @@ Failed: error 130 at offset 3: unknown POSIX class name
ab
No match
-/a[^]+b/utf,allow_empty_class,match_unset_backref
+/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
aXb
0: aXb
a\nX\nX\x{1234}b
@@ -1408,22 +1408,22 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte
2: \x{0d}
/[^\x{1234}]+/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Subject length lower bound = 1
/[^\x{1234}]+?/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Subject length lower bound = 1
/[^\x{1234}]++/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Subject length lower bound = 1
/[^\x{1234}]{2}/Ii,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
Subject length lower bound = 2
@@ -1703,7 +1703,7 @@ Partial match: \x{0d}\x{0d}
------------------------------------------------------------------
/(?<=\x{1234}\x{1234})\bxy/I,utf
-Capture group count = 0
+Capturing subpattern count = 0
Max lookbehind = 2
Options: utf
First code unit = 'x'
@@ -1734,14 +1734,6 @@ No match
/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
-/^\u{0000000000010ffff}/utf,extra_alt_bsux
- \x{10ffff}
- 0: \x{10ffff}
-
-/\u/utf,alt_bsux
- \\u
- 0: u
-
/^a+[a\x{200}]/B,utf
------------------------------------------------------------------
Bra
@@ -1776,7 +1768,7 @@ Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0x
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\p{^L}]/IB
@@ -1786,7 +1778,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\P{L}]/IB
@@ -1796,7 +1788,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[\P{^L}]/IB
@@ -1806,7 +1798,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/[abc\p{L}\x{0660}]/IB,utf
@@ -1816,7 +1808,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
@@ -1827,7 +1819,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
1234
@@ -1840,7 +1832,7 @@ Subject length lower bound = 1
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
Subject length lower bound = 1
1234
@@ -3006,7 +2998,7 @@ Partial match: AA
Ket
End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless utf
First code unit = 'A' (caseless)
Last code unit = 'B' (caseless)
@@ -3294,27 +3286,27 @@ No match
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
------------------------------------------------------------------
Bra
- AllAny+
- AllAny
- AllAny+
+ prop Any +
+ prop Any
+ prop Any +
notprop Any
- AllAny+
+ prop Any +
prop L&
- AllAny+
+ prop Any +
prop L
- AllAny+
+ prop Any +
prop Lu
- AllAny+
+ prop Any +
prop Han
- AllAny+
+ prop Any +
prop Xan
- AllAny+
+ prop Any +
prop Xsp
- AllAny+
+ prop Any +
prop Xps
prop Xwd +
- AllAny
- AllAny+
+ prop Any
+ prop Any +
prop Xuc
Ket
End
@@ -3324,7 +3316,7 @@ No match
------------------------------------------------------------------
Bra
prop L& +
- AllAny
+ prop Any
prop L& +
prop L&
notprop L& ++
@@ -3355,7 +3347,7 @@ No match
------------------------------------------------------------------
Bra
prop N +
- AllAny
+ prop Any
prop N +
prop L&
prop N ++
@@ -3386,7 +3378,7 @@ No match
------------------------------------------------------------------
Bra
prop Lu +
- AllAny
+ prop Any
prop Lu +
prop L&
prop Lu +
@@ -3448,7 +3440,7 @@ No match
------------------------------------------------------------------
Bra
prop Xan +
- AllAny
+ prop Any
prop Xan +
prop L&
notprop Xan ++
@@ -3479,7 +3471,7 @@ No match
------------------------------------------------------------------
Bra
prop Xsp +
- AllAny
+ prop Any
prop Xsp ++
prop L&
prop Xsp ++
@@ -3508,7 +3500,7 @@ No match
------------------------------------------------------------------
Bra
prop Xwd +
- AllAny
+ prop Any
prop Xwd +
prop L&
prop Xwd +
@@ -3537,7 +3529,7 @@ No match
------------------------------------------------------------------
Bra
prop Xuc +
- AllAny
+ prop Any
prop Xuc +
prop L&
prop Xuc +
@@ -3922,7 +3914,7 @@ No match
------------------------------------------------------------------
/^s?c/Iim,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: caseless multiline utf
First code unit at start or follows newline
Last code unit = 'c' (caseless)
@@ -4783,154 +4775,5 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U,
abc
0: abc
MK: ABC
-
-# Script run tests: auto-possessification
-
-/^(*sr:.*)/B,utf
-------------------------------------------------------------------
- Bra
- ^
- Script run
- Any*
- Ket
- Ket
- End
-------------------------------------------------------------------
- paypаl.com A classic example of why script run checks are a good thing
- 0: payp
-
-/^(*sr:.*(*ACCEPT))/utf
- paypаl.com But *ACCEPT breaks things
- 0: payp\x{430}l.com But *ACCEPT breaks things
-
-/^(*sr:\x{2e80}*)/B,utf
-------------------------------------------------------------------
- Bra
- ^
- Script run
- \x{2e80}*+
- Ket
- Ket
- End
-------------------------------------------------------------------
-
-/^(*sr:\x{2e80}*)\x{2e80}/B,utf
-------------------------------------------------------------------
- Bra
- ^
- Script run
- \x{2e80}*
- Ket
- \x{2e80}
- Ket
- End
-------------------------------------------------------------------
-
-/(?<!)(*sr:)/B
-------------------------------------------------------------------
- Bra
- AssertB not
- Ket
- Script run
- Ket
- Ket
- End
-------------------------------------------------------------------
-
-/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
-------------------------------------------------------------------
- Bra
- AssertB
- Reverse
- abc
- Assert
- X
- Script run
- BXY
- Ket
- CCC
- Ket
- XBXYCCC
- Ket
- Any
- Ket
- End
-------------------------------------------------------------------
- abcXBXYCCC!
- 0: !
-
-# Some script run patterns are broken in Perl 5.28.0. These can be moved into
-# test 4 when a mended version of Perl is released.
-
-/^(*sr:.{4})/utf
- \x{0980}12\x{0993} Bengali Common-digits Bengali
- 0: \x{980}12\x{993}
- \x{0780}12\x{07b1} Thaana Common-digits Thaana
- 0: \x{780}12\x{7b1}
- \x{0e01}12\x{0e5b} Thai Common-digits Thai
- 0: \x{e01}12\x{e5b}
- \x{1780}12\x{19ff} Khmer Common-digits Khmer
- 0: \x{1780}12\x{19ff}
- \x{0904}12\x{0939} Devanagari Common-digits Devanagari
- 0: \x{904}12\x{939}
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- 0: A\x{ff10}\x{ff19}B
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
- 0: A\x{1d7ce}\x{1d7cf}B
-
-# These ones involve non-ASCII but nevertheless Common digits. As of October
-# 2018 even blead Perl wasn't handling all of these - but is going to.
-
-/^(*sr:.{4})/utf
- A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
- 0: A\x{ff10}\x{ff19}B
- \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
- 0: \x{ff10}\x{ff19}..
- A\x{ff10}BC Latin Common-notascii-digit Latin Latin
- 0: A\x{ff10}BC
- A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
- 0: A\x{1d7ce}\x{1d7cf}B
- \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
- 0: \x{1d7ce}\x{1d7cf},,
- A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
- 0: A\x{1d7ce}BC
-
-# -------
-
-# Test reference and errors in non-ASCII characters in group names
-
-/(?'ð‘ …ABC'...)/I,utf
-Capture group count = 1
-Named capture groups:
- ð‘ …ABC 1
-Options: utf
-Subject length lower bound = 3
- abcde\=copy=ð‘ …ABC
- 0: abc
- 1: abc
- C abc (3) ð‘ …ABC (group 1)
-
-# Bad ones
-
-/(?'ABáŒC'...)\g{ABáŒC}/utf
-Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
-
-/(?'Ù ABC'...)/utf
-Failed: error 144 at offset 3: subpattern name must start with a non-digit
-
-/(?'²ABC'...)/utf
-Failed: error 162 at offset 3: subpattern name expected
-
-/(?'X²ABC'...)/utf
-Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
-
-# -------
-
-/\p{Any}*xyz/I
-Capture group count = 0
-Compile options: <none>
-Overall options: anchored
-Last code unit = 'z'
-Subject length lower bound = 3
# End of testinput5
diff --git a/dist2/testdata/testoutput6 b/dist2/testdata/testoutput6
index 3ef10b40..caec833f 100644
--- a/dist2/testdata/testoutput6
+++ b/dist2/testdata/testoutput6
@@ -5978,7 +5978,7 @@ Partial match: 123
0: Content-Type:xxxyyyz
/^abc/Im,newline=lf
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is LF
First code unit at start or follows newline
@@ -6001,7 +6001,7 @@ No match
No match
/^abc/Im,newline=crlf
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is CRLF
First code unit at start or follows newline
@@ -6016,7 +6016,7 @@ No match
No match
/^abc/Im,newline=cr
-Capture group count = 0
+Capturing subpattern count = 0
Options: multiline
Forced newline is CR
First code unit at start or follows newline
@@ -6031,7 +6031,7 @@ No match
No match
/.*/I,newline=lf
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is LF
First code unit at start or follows newline
@@ -6044,7 +6044,7 @@ Subject length lower bound = 0
0: abc\x0d
/.*/I,newline=cr
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is CR
First code unit at start or follows newline
@@ -6057,7 +6057,7 @@ Subject length lower bound = 0
0: abc
/.*/I,newline=crlf
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Forced newline is CRLF
First code unit at start or follows newline
@@ -6070,7 +6070,7 @@ Subject length lower bound = 0
0: abc
/\w+(.)(.)?def/Is
-Capture group count = 2
+Capturing subpattern count = 2
Options: dotall
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
@@ -6447,7 +6447,7 @@ No match
0: \x0aA
/a\Rb/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -6465,7 +6465,7 @@ No match
No match
/a\Rb/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -6482,7 +6482,7 @@ Subject length lower bound = 3
0: a\x0bb
/a\R?b/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -6500,7 +6500,7 @@ No match
No match
/a\R?b/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -6517,7 +6517,7 @@ Subject length lower bound = 2
0: a\x0bb
/a\R{2,4}b/I,bsr=anycrlf
-Capture group count = 0
+Capturing subpattern count = 0
\R matches CR, LF, or CRLF
First code unit = 'a'
Last code unit = 'b'
@@ -6535,7 +6535,7 @@ No match
No match
/a\R{2,4}b/I,bsr=unicode
-Capture group count = 0
+Capturing subpattern count = 0
\R matches any Unicode newline
First code unit = 'a'
Last code unit = 'b'
@@ -6831,7 +6831,7 @@ Partial match: +ab
0+ CBA
/(abc|def|xyz)/I
-Capture group count = 1
+Capturing subpattern count = 1
Starting code units: a d x
Subject length lower bound = 3
terhjk;abcdaadsfe
@@ -6843,7 +6843,7 @@ Subject length lower bound = 3
No match
/(abc|def|xyz)/I,no_start_optimize
-Capture group count = 1
+Capturing subpattern count = 1
Options: no_start_optimize
Subject length lower bound = 0
terhjk;abcdaadsfe
@@ -7783,29 +7783,4 @@ No match
\na
No match
-/foobar/
- the foobar thing\=copy_matched_subject
- 0: foobar
- the foobar thing\=copy_matched_subject,zero_terminate
- 0: foobar
-
-/foobar/g
- the foobar thing foobar again\=copy_matched_subject
- 0: foobar
- 0: foobar
-
-/(?(VERSION>=0)^B0W)/
- B0W-W0W
- 0: B0W
-\= Expect no match
- 0
-No match
-
-/(?(VERSION>=1000)^B0W|W0W)/
- B0W-W0W
- 0: W0W
-\= Expect no match
- 0
-No match
-
# End of testinput6
diff --git a/dist2/testdata/testoutput7 b/dist2/testdata/testoutput7
index 004186e9..f8041851 100644
--- a/dist2/testdata/testoutput7
+++ b/dist2/testdata/testoutput7
@@ -1030,7 +1030,7 @@ No match
No match
/a\Rb/I,bsr=anycrlf,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches CR, LF, or CRLF
First code unit = 'a'
@@ -1049,7 +1049,7 @@ No match
No match
/a\Rb/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
First code unit = 'a'
@@ -1067,7 +1067,7 @@ Subject length lower bound = 3
0: a\x{0b}b
/a\R?b/I,bsr=anycrlf,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches CR, LF, or CRLF
First code unit = 'a'
@@ -1086,7 +1086,7 @@ No match
No match
/a\R?b/I,bsr=unicode,utf
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
\R matches any Unicode newline
First code unit = 'a'
diff --git a/dist2/testdata/testoutput8-16-2 b/dist2/testdata/testoutput8-16-2
index 5706e60f..47c9e56f 100644
--- a/dist2/testdata/testoutput8-16-2
+++ b/dist2/testdata/testoutput8-16-2
@@ -67,7 +67,7 @@ Memory allocation (code space): 10
2 2 Ket
4 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 14
4 4 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 26
10 10 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 22
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 22
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -904,7 +904,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
79 79 Ket
81 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -938,7 +938,7 @@ Subject length lower bound = 0
43 43 Ket
45 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1011,7 +1011,7 @@ No match
133 133 Ket
135 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-16-3 b/dist2/testdata/testoutput8-16-3
index bd80a23f..722b0e1e 100644
--- a/dist2/testdata/testoutput8-16-3
+++ b/dist2/testdata/testoutput8-16-3
@@ -67,7 +67,7 @@ Memory allocation (code space): 14
3 3 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 18
5 5 Ket
8 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 30
11 11 Ket
14 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 26
9 9 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 26
9 9 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
110 110 Ket
113 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
58 58 Ket
61 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
194 194 Ket
197 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-16-4 b/dist2/testdata/testoutput8-16-4
index bd80a23f..722b0e1e 100644
--- a/dist2/testdata/testoutput8-16-4
+++ b/dist2/testdata/testoutput8-16-4
@@ -67,7 +67,7 @@ Memory allocation (code space): 14
3 3 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 18
5 5 Ket
8 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 30
11 11 Ket
14 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 26
9 9 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 26
9 9 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
110 110 Ket
113 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
58 58 Ket
61 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
194 194 Ket
197 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-32-2 b/dist2/testdata/testoutput8-32-2
index a86ef9a5..30667a30 100644
--- a/dist2/testdata/testoutput8-32-2
+++ b/dist2/testdata/testoutput8-32-2
@@ -67,7 +67,7 @@ Memory allocation (code space): 20
2 2 Ket
4 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 28
4 4 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 52
10 10 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
79 79 Ket
81 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
43 43 Ket
45 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
133 133 Ket
135 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-32-3 b/dist2/testdata/testoutput8-32-3
index a86ef9a5..30667a30 100644
--- a/dist2/testdata/testoutput8-32-3
+++ b/dist2/testdata/testoutput8-32-3
@@ -67,7 +67,7 @@ Memory allocation (code space): 20
2 2 Ket
4 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 28
4 4 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 52
10 10 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
79 79 Ket
81 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
43 43 Ket
45 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
133 133 Ket
135 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-32-4 b/dist2/testdata/testoutput8-32-4
index a86ef9a5..30667a30 100644
--- a/dist2/testdata/testoutput8-32-4
+++ b/dist2/testdata/testoutput8-32-4
@@ -67,7 +67,7 @@ Memory allocation (code space): 20
2 2 Ket
4 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 28
4 4 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 52
10 10 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
@@ -404,7 +404,7 @@ Memory allocation (code space): 44
8 8 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
79 79 Ket
81 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
43 43 Ket
45 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
133 133 Ket
135 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-8-2 b/dist2/testdata/testoutput8-8-2
index 33fd6e3e..4b03356e 100644
--- a/dist2/testdata/testoutput8-8-2
+++ b/dist2/testdata/testoutput8-8-2
@@ -67,7 +67,7 @@ Memory allocation (code space): 7
3 3 Ket
6 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 9
5 5 Ket
8 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 18
14 14 Ket
17 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 19
15 15 Ket
18 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xed
Last code unit = \xb4
@@ -404,7 +404,7 @@ Memory allocation (code space): 19
15 15 Ket
18 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe6
Last code unit = \x9e
@@ -904,7 +904,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
119 119 Ket
122 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -938,7 +938,7 @@ Subject length lower bound = 0
61 61 Ket
64 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1011,7 +1011,7 @@ No match
205 205 Ket
208 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-8-3 b/dist2/testdata/testoutput8-8-3
index 59c69250..3d33c77d 100644
--- a/dist2/testdata/testoutput8-8-3
+++ b/dist2/testdata/testoutput8-8-3
@@ -67,7 +67,7 @@ Memory allocation (code space): 9
4 4 Ket
8 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 11
6 6 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 20
15 15 Ket
19 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 21
16 16 Ket
20 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xed
Last code unit = \xb4
@@ -404,7 +404,7 @@ Memory allocation (code space): 21
16 16 Ket
20 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe6
Last code unit = \x9e
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
150 150 Ket
154 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
76 76 Ket
80 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
266 266 Ket
270 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput8-8-4 b/dist2/testdata/testoutput8-8-4
index 94808656..db049712 100644
--- a/dist2/testdata/testoutput8-8-4
+++ b/dist2/testdata/testoutput8-8-4
@@ -67,7 +67,7 @@ Memory allocation (code space): 11
5 5 Ket
10 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
May match empty string
Options: extended
Subject length lower bound = 0
@@ -80,7 +80,7 @@ Memory allocation (code space): 13
7 7 Ket
12 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: extended
First code unit = 'a'
Subject length lower bound = 1
@@ -376,7 +376,7 @@ Memory allocation (code space): 22
16 16 Ket
21 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
@@ -390,7 +390,7 @@ Memory allocation (code space): 23
17 17 Ket
22 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xed
Last code unit = \xb4
@@ -404,7 +404,7 @@ Memory allocation (code space): 23
17 17 Ket
22 End
------------------------------------------------------------------
-Capture group count = 0
+Capturing subpattern count = 0
Options: utf
First code unit = \xe6
Last code unit = \x9e
@@ -903,7 +903,7 @@ Failed: error 186 at offset 12820: regular expression is too complicated
181 181 Ket
186 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -937,7 +937,7 @@ Subject length lower bound = 0
91 91 Ket
96 End
------------------------------------------------------------------
-Capture group count = 1
+Capturing subpattern count = 1
Max back reference = 1
May match empty string
Subject length lower bound = 0
@@ -1010,7 +1010,7 @@ No match
327 327 Ket
332 End
------------------------------------------------------------------
-Capture group count = 10
+Capturing subpattern count = 10
May match empty string
Subject length lower bound = 0
diff --git a/dist2/testdata/testoutput9 b/dist2/testdata/testoutput9
index f98f2767..6b014e58 100644
--- a/dist2/testdata/testoutput9
+++ b/dist2/testdata/testoutput9
@@ -215,7 +215,7 @@ Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
-Capture group count = 0
+Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
@@ -224,25 +224,25 @@ Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
Subject length lower bound = 3
/\h/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x09 \x20 \xa0
Subject length lower bound = 1
/\H/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/\v/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85
Subject length lower bound = 1
/\V/I
-Capture group count = 0
+Capturing subpattern count = 0
Subject length lower bound = 1
/\R/I
-Capture group count = 0
+Capturing subpattern count = 0
Starting code units: \x0a \x0b \x0c \x0d \x85
Subject length lower bound = 1
diff --git a/include_internal/config.h b/include_internal/config.h
index a699a139..450330c6 100644
--- a/include_internal/config.h
+++ b/include_internal/config.h
@@ -36,10 +36,6 @@ sure both macros are undefined; an emulation function will then be used. */
*/
/* #undef BSR_ANYCRLF */
-/* Define to any value to disable the use of the z and t modifiers in
- formatting settings such as %zu or %td (this is rarely needed). */
-/* #undef DISABLE_PERCENT_ZT */
-
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
@@ -202,7 +198,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.33"
+#define PACKAGE_STRING "PCRE2 10.32"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@@ -211,7 +207,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "10.33"
+#define PACKAGE_VERSION "10.32"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@@ -291,11 +287,6 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable callout script support in pcre2grep. */
#define SUPPORT_PCRE2GREP_CALLOUT /**/
-/* Define to any value to enable fork support in pcre2grep callout scripts.
- This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
- */
-#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/
-
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
/* #undef SUPPORT_PCRE2GREP_JIT */
@@ -341,7 +332,7 @@ sure both macros are undefined; an emulation function will then be used. */
/* Version number of package */
-#define VERSION "10.33"
+#define VERSION "10.32"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
diff --git a/pcrecpp/include/pcre_scanner.h b/pcrecpp/include/pcre_scanner.h
new file mode 100644
index 00000000..b2bfabec
--- /dev/null
+++ b/pcrecpp/include/pcre_scanner.h
@@ -0,0 +1,173 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+//
+// Regular-expression based scanner for parsing an input stream.
+//
+// Example 1: parse a sequence of "var = number" entries from input:
+//
+// Scanner scanner(input);
+// string var;
+// int number;
+// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
+// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
+// ...;
+// }
+
+#ifndef _PCRE_SCANNER_H
+#define _PCRE_SCANNER_H
+
+#include <assert.h>
+#include <string>
+#include <vector>
+
+#include <pcrecpp.h>
+#include <pcre_stringpiece.h>
+
+namespace pcrecpp {
+
+class Scanner {
+ public:
+ Scanner();
+ explicit Scanner(const std::string& input);
+ ~Scanner();
+
+ // Return current line number. The returned line-number is
+ // one-based. I.e. it returns 1 + the number of consumed newlines.
+ //
+ // Note: this method may be slow. It may take time proportional to
+ // the size of the input.
+ int LineNumber() const;
+
+ // Return the byte-offset that the scanner is looking in the
+ // input data;
+ int Offset() const;
+
+ // Return true iff the start of the remaining input matches "re"
+ bool LookingAt(const RE& re) const;
+
+ // Return true iff all of the following are true
+ // a. the start of the remaining input matches "re",
+ // b. if any arguments are supplied, matched sub-patterns can be
+ // parsed and stored into the arguments.
+ // If it returns true, it skips over the matched input and any
+ // following input that matches the "skip" regular expression.
+ template<typename ... ARGS>
+ bool Consume(const RE& re, ARGS && ... args) {
+ const bool result = re.Consume(&input_, args...);
+ if (result && should_skip_)
+ ConsumeSkip();
+ return result;
+ }
+
+ // Set the "skip" regular expression. If after consuming some data,
+ // a prefix of the input matches this RE, it is automatically
+ // skipped. For example, a programming language scanner would use
+ // a skip RE that matches white space and comments.
+ //
+ // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
+ //
+ // Skipping repeats as long as it succeeds. We used to let people do
+ // this by writing "(...)*" in the regular expression, but that added
+ // up to lots of recursive calls within the pcre library, so now we
+ // control repetition explicitly via the function call API.
+ //
+ // You can pass NULL for "re" if you do not want any data to be skipped.
+ void Skip(const char* re); // DEPRECATED; does *not* repeat
+ void SetSkipExpression(const char* re);
+
+ // Temporarily pause "skip"ing. This
+ // Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
+ // is similar to
+ // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
+ // but avoids creating/deleting new RE objects.
+ void DisableSkip();
+
+ // Reenable previously paused skipping. Any prefix of the input
+ // that matches the skip pattern is immediately dropped.
+ void EnableSkip();
+
+ /***** Special wrappers around SetSkip() for some common idioms *****/
+
+ // Arranges to skip whitespace, C comments, C++ comments.
+ // The overall RE is a disjunction of the following REs:
+ // \\s whitespace
+ // //.*\n C++ comment
+ // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
+ // We get repetition via the semantics of SetSkipExpression, not by using *
+ void SkipCXXComments() {
+ SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
+ }
+
+ void set_save_comments(bool comments) {
+ save_comments_ = comments;
+ }
+
+ bool save_comments() {
+ return save_comments_;
+ }
+
+ // Append to vector ranges the comments found in the
+ // byte range [start,end] (inclusive) of the input data.
+ // Only comments that were extracted entirely within that
+ // range are returned: no range splitting of atomically-extracted
+ // comments is performed.
+ void GetComments(int start, int end, std::vector<StringPiece> *ranges);
+
+ // Append to vector ranges the comments added
+ // since the last time this was called. This
+ // functionality is provided for efficiency when
+ // interleaving scanning with parsing.
+ void GetNextComments(std::vector<StringPiece> *ranges);
+
+ private:
+ std::string data_; // All the input data
+ StringPiece input_; // Unprocessed input
+ RE* skip_; // If non-NULL, RE for skipping input
+ bool should_skip_; // If true, use skip_
+ bool skip_repeat_; // If true, repeat skip_ as long as it works
+ bool save_comments_; // If true, aggregate the skip expression
+
+ // the skipped comments
+ // TODO: later consider requiring that the StringPieces be added
+ // in order by their start position
+ std::vector<StringPiece> *comments_;
+
+ // the offset into comments_ that has been returned by GetNextComments
+ int comments_offset_;
+
+ // helper function to consume *skip_ and honour
+ // save_comments_
+ void ConsumeSkip();
+};
+
+} // namespace pcrecpp
+
+#endif /* _PCRE_SCANNER_H */
diff --git a/pcrecpp/include/pcre_stringpiece.h b/pcrecpp/include/pcre_stringpiece.h
new file mode 100644
index 00000000..51b9812b
--- /dev/null
+++ b/pcrecpp/include/pcre_stringpiece.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+//
+// A string like object that points into another piece of memory.
+// Useful for providing an interface that allows clients to easily
+// pass in either a "const char*" or a "string".
+//
+// Arghh! I wish C++ literals were automatically of type "string".
+
+#ifndef _PCRE_STRINGPIECE_H
+#define _PCRE_STRINGPIECE_H
+
+#include <cstring>
+#include <string>
+#include <iosfwd> // for ostream forward-declaration
+
+#if 0
+#define HAVE_TYPE_TRAITS
+#include <type_traits.h>
+#elif 0
+#define HAVE_TYPE_TRAITS
+#include <bits/type_traits.h>
+#endif
+
+#include <pcre2.h>
+
+using std::memcmp;
+using std::strlen;
+using std::string;
+
+namespace pcrecpp {
+
+class StringPiece {
+ private:
+ const char* ptr_;
+ int length_;
+
+ public:
+ // We provide non-explicit singleton constructors so users can pass
+ // in a "const char*" or a "string" wherever a "StringPiece" is
+ // expected.
+ StringPiece()
+ : ptr_(NULL), length_(0) { }
+ StringPiece(const char* str)
+ : ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
+ StringPiece(const unsigned char* str)
+ : ptr_(reinterpret_cast<const char*>(str)),
+ length_(static_cast<int>(strlen(ptr_))) { }
+ StringPiece(const string& str)
+ : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
+ StringPiece(const char* offset, int len)
+ : ptr_(offset), length_(len) { }
+
+ // data() may return a pointer to a buffer with embedded NULs, and the
+ // returned buffer may or may not be null terminated. Therefore it is
+ // typically a mistake to pass data() to a routine that expects a NUL
+ // terminated string. Use "as_string().c_str()" if you really need to do
+ // this. Or better yet, change your routine so it does not rely on NUL
+ // termination.
+ const char* data() const { return ptr_; }
+ int size() const { return length_; }
+ bool empty() const { return length_ == 0; }
+
+ void clear() { ptr_ = NULL; length_ = 0; }
+ void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
+ void set(const char* str) {
+ ptr_ = str;
+ length_ = static_cast<int>(strlen(str));
+ }
+ void set(const void* buffer, int len) {
+ ptr_ = reinterpret_cast<const char*>(buffer);
+ length_ = len;
+ }
+
+ char operator[](int i) const { return ptr_[i]; }
+
+ void remove_prefix(int n) {
+ ptr_ += n;
+ length_ -= n;
+ }
+
+ void remove_suffix(int n) {
+ length_ -= n;
+ }
+
+ bool operator==(const StringPiece& x) const {
+ return ((length_ == x.length_) &&
+ (memcmp(ptr_, x.ptr_, length_) == 0));
+ }
+ bool operator!=(const StringPiece& x) const {
+ return !(*this == x);
+ }
+
+#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \
+ bool operator cmp (const StringPiece& x) const { \
+ int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
+ return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \
+ }
+ STRINGPIECE_BINARY_PREDICATE(<, <);
+ STRINGPIECE_BINARY_PREDICATE(<=, <);
+ STRINGPIECE_BINARY_PREDICATE(>=, >);
+ STRINGPIECE_BINARY_PREDICATE(>, >);
+#undef STRINGPIECE_BINARY_PREDICATE
+
+ int compare(const StringPiece& x) const {
+ int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
+ if (r == 0) {
+ if (length_ < x.length_) r = -1;
+ else if (length_ > x.length_) r = +1;
+ }
+ return r;
+ }
+
+ string as_string() const {
+ return string(data(), size());
+ }
+
+ void CopyToString(string* target) const {
+ target->assign(ptr_, length_);
+ }
+
+ // Does "this" start with "x"
+ bool starts_with(const StringPiece& x) const {
+ return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
+ }
+};
+
+} // namespace pcrecpp
+
+// ------------------------------------------------------------------
+// Functions used to create STL containers that use StringPiece
+// Remember that a StringPiece's lifetime had better be less than
+// that of the underlying string or char*. If it is not, then you
+// cannot safely store a StringPiece into an STL container
+// ------------------------------------------------------------------
+
+#ifdef HAVE_TYPE_TRAITS
+// This makes vector<StringPiece> really fast for some STL implementations
+template<> struct __type_traits<pcrecpp::StringPiece> {
+ typedef __true_type has_trivial_default_constructor;
+ typedef __true_type has_trivial_copy_constructor;
+ typedef __true_type has_trivial_assignment_operator;
+ typedef __true_type has_trivial_destructor;
+ typedef __true_type is_POD_type;
+};
+#endif
+
+// allow StringPiece to be logged
+extern std::ostream& operator<<(std::ostream& o,
+ const pcrecpp::StringPiece& piece);
+
+#endif /* _PCRE_STRINGPIECE_H */
diff --git a/pcrecpp/include/pcrecpp.h b/pcrecpp/include/pcrecpp.h
new file mode 100644
index 00000000..0c216b2a
--- /dev/null
+++ b/pcrecpp/include/pcrecpp.h
@@ -0,0 +1,697 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
+
+#ifndef _PCRECPP_H
+#define _PCRECPP_H
+
+// C++ interface to the pcre regular-expression library. RE supports
+// Perl-style regular expressions (with extensions like \d, \w, \s,
+// ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module is part of the pcre library and hence supports its syntax
+// for regular expressions.
+//
+// The syntax is pretty similar to Perl's. For those not familiar
+// with Perl's regular expressions, here are some examples of the most
+// commonly used extensions:
+//
+// "hello (\\w+) world" -- \w matches a "word" character
+// "version (\\d+)" -- \d matches a digit
+// "hello\\s+world" -- \s matches any whitespace character
+// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
+// "(?i)hello" -- (?i) turns on case-insensitive matching
+// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+// pcrecpp::RE re("h.*o");
+// re.FullMatch("hello");
+//
+// Example: unsuccessful match (requires full match):
+// pcrecpp::RE re("e");
+// !re.FullMatch("hello");
+//
+// Example: creating a temporary RE object:
+// pcrecpp::RE("h.*o").FullMatch("hello");
+//
+// You can pass in a "const char*" or a "string" for "text". The
+// examples below tend to use a const char*.
+//
+// You can, as in the different examples above, store the RE object
+// explicitly in a variable or use a temporary RE object. The
+// examples below use one mode or the other arbitrarily. Either
+// could correctly be used for any of these examples.
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUB-STRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched subpieces.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+// int i;
+// string s;
+// pcrecpp::RE re("(\\w+):(\\d+)");
+// re.FullMatch("ruby:1234", &s, &i);
+//
+// Example: does not try to extract any extra sub-patterns
+// re.FullMatch("ruby:1234", &s);
+//
+// Example: does not try to extract into NULL
+// re.FullMatch("ruby:1234", NULL, &i);
+//
+// Example: integer overflow causes failure
+// !re.FullMatch("ruby:1234567891234", NULL, &i);
+//
+// Example: fails because there aren't enough sub-patterns:
+// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
+//
+// Example: fails because string cannot be stored in integer
+// !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
+//
+// The provided pointer arguments can be pointers to any scalar numeric
+// type, or one of
+// string (matched piece is copied to string)
+// StringPiece (StringPiece is mutated to point to matched piece)
+// T (where "bool T::ParseFrom(const char*, int)" exists)
+// NULL (the corresponding matched sub-pattern is not copied)
+//
+// CAVEAT: An optional sub-pattern that does not exist in the matched
+// string is assigned the empty string. Therefore, the following will
+// return false (because the empty string is not a valid number):
+// int number;
+// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+//
+// -----------------------------------------------------------------------
+// DO_MATCH
+//
+// The matching interface supports at most 16 arguments per call.
+// If you need more, consider using the more general interface
+// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch.
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+// pcrecpp::RE("ell").PartialMatch("hello");
+//
+// Example: find first number in a string:
+// int number;
+// pcrecpp::RE re("(\\d+)");
+// re.PartialMatch("x*100 + 20", &number);
+// assert(number == 100);
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, pattern and text are plain text, one byte per character.
+// The UTF8 flag, passed to the constructor, causes both pattern
+// and string to be treated as UTF-8 text, still a byte stream but
+// potentially multiple bytes per character. In practice, the text
+// is likelier to be UTF-8 than the pattern, but the match returned
+// may depend on the UTF8 flag, so always use it when matching
+// UTF8 text. E.g., "." will match one byte normally but with UTF8
+// set may match up to three bytes of a multi-byte character.
+//
+// Example:
+// pcrecpp::RE_Options options;
+// options.set_utf8();
+// pcrecpp::RE re(utf8_pattern, options);
+// re.FullMatch(utf8_string);
+//
+// Example: using the convenience function UTF8():
+// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
+// re.FullMatch(utf8_string);
+//
+// NOTE: The UTF8 option is ignored if pcre was not configured with the
+// --enable-utf8 flag.
+//
+// -----------------------------------------------------------------------
+// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
+//
+// PCRE defines some modifiers to change the behavior of the regular
+// expression engine.
+// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
+// to pass such modifiers to a RE class.
+//
+// Currently, the following modifiers are supported
+//
+// modifier description Perl corresponding
+//
+// PCRE_CASELESS case insensitive match /i
+// PCRE_MULTILINE multiple lines match /m
+// PCRE_DOTALL dot matches newlines /s
+// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
+// PCRE_EXTRA strict escape parsing N/A
+// PCRE_EXTENDED ignore whitespaces /x
+// PCRE_UTF8 handles UTF8 chars built-in
+// PCRE_UNGREEDY reverses * and *? N/A
+// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
+//
+// (For a full account on how each modifier works, please check the
+// PCRE API reference manual).
+//
+// (*) Both Perl and PCRE allow non matching parentheses by means of the
+// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
+// capture, while (ab|cd) does.
+//
+// For each modifier, there are two member functions whose name is made
+// out of the modifier in lowercase, without the "PCRE_" prefix. For
+// instance, PCRE_CASELESS is handled by
+// bool caseless(),
+// which returns true if the modifier is set, and
+// RE_Options & set_caseless(bool),
+// which sets or unsets the modifier.
+//
+// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
+// set_match_limit() and match_limit() member functions.
+// Setting match_limit to a non-zero value will limit the executation of
+// pcre to keep it from doing bad things like blowing the stack or taking
+// an eternity to return a result. A value of 5000 is good enough to stop
+// stack blowup in a 2MB thread stack. Setting match_limit to zero will
+// disable match limiting. Alternately, you can set match_limit_recursion()
+// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
+// recurses. match_limit() caps the number of matches pcre does;
+// match_limit_recrusion() caps the depth of recursion.
+//
+// Normally, to pass one or more modifiers to a RE class, you declare
+// a RE_Options object, set the appropriate options, and pass this
+// object to a RE constructor. Example:
+//
+// RE_options opt;
+// opt.set_caseless(true);
+//
+// if (RE("HELLO", opt).PartialMatch("hello world")) ...
+//
+// RE_options has two constructors. The default constructor takes no
+// arguments and creates a set of flags that are off by default.
+//
+// The optional parameter 'option_flags' is to facilitate transfer
+// of legacy code from C programs. This lets you do
+// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+//
+// But new code is better off doing
+// RE(pattern,
+// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
+// (See below)
+//
+// If you are going to pass one of the most used modifiers, there are some
+// convenience functions that return a RE_Options class with the
+// appropriate modifier already set:
+// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
+//
+// If you need to set several options at once, and you don't want to go
+// through the pains of declaring a RE_Options object and setting several
+// options, there is a parallel method that give you such ability on the
+// fly. You can concatenate several set_xxxxx member functions, since each
+// of them returns a reference to its class object. e.g.: to pass
+// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
+// statement, you may write
+//
+// RE(" ^ xyz \\s+ .* blah$", RE_Options()
+// .set_caseless(true)
+// .set_extended(true)
+// .set_multiline(true)).PartialMatch(sometext);
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match. This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string. Like RE, StringPiece
+// is defined in the pcrecpp namespace.
+//
+// Example: read lines of the form "var = value" from a string.
+// string contents = ...; // Fill string somehow
+// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
+//
+// string var;
+// int value;
+// pcrecpp::RE re("(\\w+) = (\\d+)\n");
+// while (re.Consume(&input, &var, &value)) {
+// ...;
+// }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string. For example, you
+// could extract all words from a string by repeatedly calling
+// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number. You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base. The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+// int a, b, c, d;
+// pcrecpp::RE re("(.*) (.*) (.*) (.*)");
+// re.FullMatch("100 40 0100 0x40",
+// pcrecpp::Octal(&a), pcrecpp::Hex(&b),
+// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
+// will leave 64 in a, b, c, and d.
+//
+// -----------------------------------------------------------------------
+// REPLACING PARTS OF STRINGS
+//
+// You can replace the first match of "pattern" in "str" with
+// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9)
+// can be used to insert text matching corresponding parenthesized
+// group from the pattern. \0 in "rewrite" refers to the entire
+// matching text. E.g.,
+//
+// string s = "yabba dabba doo";
+// pcrecpp::RE("b+").Replace("d", &s);
+//
+// will leave "s" containing "yada dabba doo". The result is true if
+// the pattern matches and a replacement occurs, or false otherwise.
+//
+// GlobalReplace() is like Replace(), except that it replaces all
+// occurrences of the pattern in the string with the rewrite.
+// Replacements are not subject to re-matching. E.g.,
+//
+// string s = "yabba dabba doo";
+// pcrecpp::RE("b+").GlobalReplace("d", &s);
+//
+// will leave "s" containing "yada dada doo". It returns the number
+// of replacements made.
+//
+// Extract() is like Replace(), except that if the pattern matches,
+// "rewrite" is copied into "out" (an additional argument) with
+// substitutions. The non-matching portions of "text" are ignored.
+// Returns true iff a match occurred and the extraction happened
+// successfully. If no match occurs, the string is left unaffected.
+
+
+#include <string>
+#include <pcre2.h>
+#include <pcrecpparg.h> // defines the Arg class
+// This isn't technically needed here, but we include it
+// anyway so folks who include pcrecpp.h don't have to.
+#include <pcre_stringpiece.h>
+#include <memory>
+
+namespace pcrecpp {
+
+#define PCRE_SET_OR_CLEAR(b, o) \
+ if (b) all_options_ |= (o); else all_options_ &= ~(o); \
+ return *this
+
+#define PCRE_IS_SET(o) \
+ (all_options_ & o) == o
+
+typedef std::shared_ptr<pcre2_match_data> pcre2_match_data_ptr;
+
+/***** Compiling regular expressions: the RE class *****/
+
+// RE_Options allow you to set options to be passed along to pcre,
+// along with other options we put on top of pcre.
+// Only 9 modifiers, plus match_limit and match_limit_recursion,
+// are supported now.
+class RE_Options {
+ public:
+ // constructor
+ RE_Options()
+ : newline_mode_(0),
+ match_limit_(0),
+ match_limit_recursion_(0),
+ all_options_(0) {
+ }
+
+ // alternative constructor.
+ // To facilitate transfer of legacy code from C programs
+ //
+ // This lets you do
+ // RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+ // But new code is better off doing
+ // RE(pattern,
+ // RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
+ RE_Options(int option_flags)
+ : newline_mode_(0),
+ match_limit_(0),
+ match_limit_recursion_(0),
+ all_options_(option_flags) {
+ }
+ // we're fine with the default destructor, copy constructor, etc.
+
+ // accessors and mutators
+ int match_limit() const { return match_limit_; };
+ RE_Options &set_match_limit(int limit) {
+ match_limit_ = limit;
+ return *this;
+ }
+
+ int match_limit_recursion() const { return match_limit_recursion_; };
+ RE_Options &set_match_limit_recursion(int limit) {
+ match_limit_recursion_ = limit;
+ return *this;
+ }
+
+ bool caseless() const {
+ return PCRE_IS_SET(PCRE2_CASELESS);
+ }
+ RE_Options &set_caseless(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_CASELESS);
+ }
+
+ bool multiline() const {
+ return PCRE_IS_SET(PCRE2_MULTILINE);
+ }
+ RE_Options &set_multiline(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_MULTILINE);
+ }
+
+ int newline_mode() const {
+ if(newline_mode_)
+ return newline_mode_;
+ else {
+ // if newline_mode_ is 0 return the global configuration default
+ int value;
+ pcre2_config_8(PCRE2_CONFIG_NEWLINE, &value);
+ return value;
+ }
+ }
+ RE_Options & set_newline_mode(int newline_mode) {
+ newline_mode_ = newline_mode;
+ return *this;
+ }
+
+ bool dotall() const {
+ return PCRE_IS_SET(PCRE2_DOTALL);
+ }
+ RE_Options &set_dotall(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_DOTALL);
+ }
+
+ bool extended() const {
+ return PCRE_IS_SET(PCRE2_EXTENDED);
+ }
+ RE_Options &set_extended(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_EXTENDED);
+ }
+
+ bool dollar_endonly() const {
+ return PCRE_IS_SET(PCRE2_DOLLAR_ENDONLY);
+ }
+ RE_Options &set_dollar_endonly(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_DOLLAR_ENDONLY);
+ }
+
+ bool ungreedy() const {
+ return PCRE_IS_SET(PCRE2_UNGREEDY);
+ }
+ RE_Options &set_ungreedy(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_UNGREEDY);
+ }
+
+ bool utf() const {
+ return PCRE_IS_SET(PCRE2_UTF);
+ }
+ RE_Options &set_utf(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_UTF);
+ }
+
+ bool no_auto_capture() const {
+ return PCRE_IS_SET(PCRE2_NO_AUTO_CAPTURE);
+ }
+ RE_Options &set_no_auto_capture(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE2_NO_AUTO_CAPTURE);
+ }
+
+ RE_Options &set_all_options(int opt) {
+ all_options_ = opt;
+ return *this;
+ }
+ int all_options() const {
+ return all_options_ ;
+ }
+
+ // TODO: add other pcre flags
+
+ private:
+ int newline_mode_;
+ int match_limit_;
+ int match_limit_recursion_;
+ int all_options_;
+};
+
+// These functions return some common RE_Options
+static inline RE_Options UTF() {
+ return RE_Options().set_utf(true);
+}
+
+static inline RE_Options CASELESS() {
+ return RE_Options().set_caseless(true);
+}
+static inline RE_Options MULTILINE() {
+ return RE_Options().set_multiline(true);
+}
+
+static inline RE_Options DOTALL() {
+ return RE_Options().set_dotall(true);
+}
+
+static inline RE_Options EXTENDED() {
+ return RE_Options().set_extended(true);
+}
+
+// Interface for regular expression matching. Also corresponds to a
+// pre-compiled regular expression. An "RE" object is safe for
+// concurrent use by multiple threads.
+class RE {
+ public:
+ // We provide implicit conversions from strings so that users can
+ // pass in a string or a "const char*" wherever an "RE" is expected.
+ RE(const string& pat) { Init(pat, NULL); }
+ RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
+ RE(const char* pat) { Init(pat, NULL); }
+ RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
+ RE(const unsigned char* pat) {
+ Init(reinterpret_cast<const char*>(pat), NULL);
+ }
+ RE(const unsigned char* pat, const RE_Options& option) {
+ Init(reinterpret_cast<const char*>(pat), &option);
+ }
+
+ // Copy constructor & assignment - note that these are expensive
+ // because they recompile the expression.
+ RE(const RE& re) { Init(re.pattern_, &re.options_); }
+ const RE& operator=(const RE& re) {
+ if (this != &re) {
+ Cleanup();
+
+ // This is the code that originally came from Google
+ // Init(re.pattern_.c_str(), &re.options_);
+
+ // This is the replacement from Ari Pollak
+ Init(re.pattern_, &re.options_);
+ }
+ return *this;
+ }
+
+
+ ~RE();
+
+ // The string specification for this RE. E.g.
+ // RE re("ab*c?d+");
+ // re.pattern(); // "ab*c?d+"
+ const string& pattern() const { return pattern_; }
+
+ // If RE could not be created properly, returns an error string.
+ // Else returns the empty string.
+ const string& error() const { return error_; }
+
+ /***** The useful part: the matching interface *****/
+
+ // This is provided so one can do pattern.ReplaceAll() just as
+ // easily as ReplaceAll(pattern-text, ....)
+
+ template<typename ... ARGS>
+ bool FullMatch(const StringPiece & text, ARGS && ...a) const {
+ // create an array with the size of the number of arguments given
+ Arg args[Args<ARGS...>::count()];
+ // initialize the array with the arguments given
+ Args<ARGS...>::arrayify(args, a...);
+
+ return DoMatchImpl(text, ANCHOR_BOTH, NULL, args, Args<ARGS...>::count());
+ }
+
+ template<typename ... ARGS>
+ bool PartialMatch(const StringPiece& text, ARGS && ...a) const {
+ // create an array with the size of the number of arguments given
+ Arg args[Args<ARGS...>::count()];
+ // initialize the array with the arguments given
+ Args<ARGS...>::arrayify(args, a...);
+
+ return DoMatchImpl(text, UNANCHORED, NULL, args, Args<ARGS...>::count());
+ }
+
+ template<typename ... ARGS>
+ bool Consume(StringPiece* input, ARGS && ...a) const {
+ // create an array with the size of the number of arguments given
+ Arg args[Args<ARGS...>::count()];
+ // initialize the array with the arguments given
+ Args<ARGS...>::arrayify(args, a...);
+
+ int consumed;
+ if (DoMatchImpl(*input, ANCHOR_START, &consumed, args,
+ Args<ARGS...>::count())) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ template<typename ... ARGS>
+ bool FindAndConsume(StringPiece* input, ARGS && ...a) const {
+ Arg args[Args<ARGS...>::count()];
+ Args<ARGS...>::arrayify(args, a...);
+ int consumed;
+ if (DoMatchImpl(*input, UNANCHORED, &consumed, args,
+ Args<ARGS...>::count())) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool Replace(const StringPiece& rewrite,
+ string *str) const;
+
+ int GlobalReplace(const StringPiece& rewrite,
+ string *str) const;
+
+ bool Extract(const StringPiece &rewrite,
+ const StringPiece &text,
+ string *out) const;
+
+ // Escapes all potentially meaningful regexp characters in
+ // 'unquoted'. The returned string, used as a regular expression,
+ // will exactly match the original string. For example,
+ // 1.5-2.0?
+ // may become:
+ // 1\.5\-2\.0\?
+ // Note QuoteMeta behaves the same as perl's QuoteMeta function,
+ // *except* that it escapes the NUL character (\0) as backslash + 0,
+ // rather than backslash + NUL.
+ static string QuoteMeta(const StringPiece& unquoted);
+
+
+ /***** Generic matching interface *****/
+
+ // Type of match (TODO: Should be restructured as part of RE_Options)
+ enum Anchor {
+ UNANCHORED, // No anchoring
+ ANCHOR_START, // Anchor at start only
+ ANCHOR_BOTH // Anchor at start and end
+ };
+
+ // General matching routine. Stores the length of the match in
+ // "*consumed" if successful.
+ bool DoMatch(const StringPiece& text,
+ Anchor anchor,
+ int* consumed,
+ Arg const argsp[], int n) const;
+
+ // Return the number of capturing subpatterns, or -1 if the
+ // regexp wasn't valid on construction.
+ int NumberOfCapturingGroups() const;
+
+ private:
+
+ void Init(const string& pattern, const RE_Options* options);
+ void Cleanup();
+
+ // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
+ // pairs of integers for the beginning and end positions of matched
+ // text. The first pair corresponds to the entire matched text;
+ // subsequent pairs correspond, in order, to parentheses-captured
+ // matches. Returns the number of pairs (one more than the number of
+ // the last subpattern with a match) if matching was successful
+ // and zero if the match failed.
+ // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
+ // against "foo", "bar", and "baz" respectively.
+ // When matching RE("(foo)|hello") against "hello", it will return 1.
+ // But the values for all subpattern are filled in into "vec".
+ int TryMatch(const StringPiece& text,
+ int startpos,
+ Anchor anchor,
+ bool empty_ok,
+ pcre2_match_data_ptr & match_data) const;
+
+ // Append the "rewrite" string, with backslash subsitutions from "text"
+ // and "vec", to string "out".
+ bool Rewrite(string *out,
+ const StringPiece& rewrite,
+ const StringPiece& text,
+ pcre2_match_data_ptr const & match_data) const;
+
+ // internal implementation for DoMatch
+ bool DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ int* consumed,
+ const Arg args[],
+ int n) const;
+
+ // Compile the regexp for the specified anchoring mode
+ pcre2_code * Compile(Anchor anchor);
+
+ string pattern_;
+ RE_Options options_;
+ pcre2_code* re_full_; // For full matches
+ pcre2_code* re_partial_; // For partial matches
+ string error_; // Error indicator
+};
+
+} // namespace pcrecpp
+
+#endif /* _PCRECPP_H */
diff --git a/pcrecpp/include/pcrecpparg.h b/pcrecpp/include/pcrecpparg.h
new file mode 100644
index 00000000..0a1713f4
--- /dev/null
+++ b/pcrecpp/include/pcrecpparg.h
@@ -0,0 +1,208 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+
+#ifndef _PCRECPPARG_H
+#define _PCRECPPARG_H
+
+#include <stdlib.h> // for NULL
+#include <string>
+
+#include <pcre2.h>
+
+namespace pcrecpp {
+
+class StringPiece;
+
+// Hex/Octal/Binary?
+
+// Special class for parsing into objects that define a ParseFrom() method
+template <class T>
+class _RE_MatchObject {
+ public:
+ static inline bool Parse(const char* str, int n, void* dest) {
+ if (dest == NULL) return true;
+ T* object = reinterpret_cast<T*>(dest);
+ return object->ParseFrom(str, n);
+ }
+};
+
+class Arg {
+ public:
+ // Empty constructor so we can declare arrays of Arg
+ Arg();
+
+ // Constructor specially designed for NULL arguments
+ Arg(void*);
+
+ typedef bool (*Parser)(const char* str, int n, void* dest);
+
+// Type-specific parsers
+#define PCRE_MAKE_PARSER(type,name) \
+ Arg(type* p) : arg_(p), parser_(name) { } \
+ Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
+
+
+ PCRE_MAKE_PARSER(char, parse_char);
+ PCRE_MAKE_PARSER(unsigned char, parse_uchar);
+ PCRE_MAKE_PARSER(short, parse_short);
+ PCRE_MAKE_PARSER(unsigned short, parse_ushort);
+ PCRE_MAKE_PARSER(int, parse_int);
+ PCRE_MAKE_PARSER(unsigned int, parse_uint);
+ PCRE_MAKE_PARSER(long, parse_long);
+ PCRE_MAKE_PARSER(unsigned long, parse_ulong);
+#if 1
+ PCRE_MAKE_PARSER(long long, parse_longlong);
+#endif
+#if 1
+ PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
+#endif
+ PCRE_MAKE_PARSER(float, parse_float);
+ PCRE_MAKE_PARSER(double, parse_double);
+ PCRE_MAKE_PARSER(std::string, parse_string);
+ PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
+
+#undef PCRE_MAKE_PARSER
+
+ // Generic constructor
+ template <class T> Arg(T*, Parser parser);
+ // Generic constructor template
+ template <class T> Arg(T* p)
+ : arg_(p), parser_(_RE_MatchObject<T>::Parse) {
+ }
+
+ // Parse the data
+ bool Parse(const char* str, int n) const;
+
+ private:
+ void* arg_;
+ Parser parser_;
+
+ static bool parse_null (const char* str, int n, void* dest);
+ static bool parse_char (const char* str, int n, void* dest);
+ static bool parse_uchar (const char* str, int n, void* dest);
+ static bool parse_float (const char* str, int n, void* dest);
+ static bool parse_double (const char* str, int n, void* dest);
+ static bool parse_string (const char* str, int n, void* dest);
+ static bool parse_stringpiece (const char* str, int n, void* dest);
+
+#define PCRE_DECLARE_INTEGER_PARSER(name) \
+ private: \
+ static bool parse_ ## name(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _radix( \
+ const char* str, int n, void* dest, int radix); \
+ public: \
+ static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
+ static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
+
+ PCRE_DECLARE_INTEGER_PARSER(short);
+ PCRE_DECLARE_INTEGER_PARSER(ushort);
+ PCRE_DECLARE_INTEGER_PARSER(int);
+ PCRE_DECLARE_INTEGER_PARSER(uint);
+ PCRE_DECLARE_INTEGER_PARSER(long);
+ PCRE_DECLARE_INTEGER_PARSER(ulong);
+ PCRE_DECLARE_INTEGER_PARSER(longlong);
+ PCRE_DECLARE_INTEGER_PARSER(ulonglong);
+
+#undef PCRE_DECLARE_INTEGER_PARSER
+};
+
+inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
+inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
+
+inline bool Arg::Parse(const char* str, int n) const {
+ return (*parser_)(str, n, arg_);
+}
+
+// This part of the parser, appropriate only for ints, deals with bases
+#define MAKE_INTEGER_PARSER(type, name) \
+ inline Arg Hex(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _hex); } \
+ inline Arg Octal(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _octal); } \
+ inline Arg CRadix(type* ptr) { \
+ return Arg(ptr, Arg::parse_ ## name ## _cradix); }
+
+MAKE_INTEGER_PARSER(short, short) /* */
+MAKE_INTEGER_PARSER(unsigned short, ushort) /* */
+MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */
+MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */
+MAKE_INTEGER_PARSER(long, long) /* because they can cause */
+MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */
+#if 1 /* the checking level is */
+MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */
+#endif /* */
+#if 1 /* */
+MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */
+#endif
+
+#undef PCRE_IS_SET
+#undef PCRE_SET_OR_CLEAR
+#undef MAKE_INTEGER_PARSER
+
+template<typename ARG>
+inline Arg wrap_arg(ARG && any) {
+ return Arg(any);
+}
+
+inline Arg const & wrap_arg(Arg const & arg) {
+ return arg;
+}
+
+template<typename ... ARGS>
+struct Args;
+
+template<typename HEAD, typename ... TAIL>
+struct Args<HEAD, TAIL...> {
+ typedef Args<TAIL...> next;
+ constexpr static unsigned count() {
+ return 1 + next::count();
+ }
+ template<typename _HEAD, typename ... _TAIL>
+ inline static void arrayify(Arg * ptr, _HEAD && head, _TAIL && ... tail) {
+ *ptr++ = wrap_arg(head);
+ next::arrayify(ptr, tail...);
+ }
+};
+
+template<>
+struct Args<> {
+ constexpr static unsigned count() {
+ return 0;
+ }
+ inline static void arrayify(Arg *) {
+ }
+};
+
+} // namespace pcrecpp
+
+
+#endif /* _PCRECPPARG_H */
diff --git a/pcrecpp/pcre_scanner.cc b/pcrecpp/pcre_scanner.cc
new file mode 100644
index 00000000..2887d6b2
--- /dev/null
+++ b/pcrecpp/pcre_scanner.cc
@@ -0,0 +1,186 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+
+#include <vector>
+#include <assert.h>
+
+#include "pcrecpp_internal.h"
+#include "pcre_scanner.h"
+
+using std::vector;
+
+namespace pcrecpp {
+
+Scanner::Scanner()
+ : data_(),
+ input_(data_),
+ skip_(NULL),
+ should_skip_(false),
+ skip_repeat_(false),
+ save_comments_(false),
+ comments_(NULL),
+ comments_offset_(0) {
+}
+
+Scanner::Scanner(const string& in)
+ : data_(in),
+ input_(data_),
+ skip_(NULL),
+ should_skip_(false),
+ skip_repeat_(false),
+ save_comments_(false),
+ comments_(NULL),
+ comments_offset_(0) {
+}
+
+Scanner::~Scanner() {
+ delete skip_;
+ delete comments_;
+}
+
+void Scanner::SetSkipExpression(const char* re) {
+ delete skip_;
+ if (re != NULL) {
+ skip_ = new RE(re);
+ should_skip_ = true;
+ skip_repeat_ = true;
+ ConsumeSkip();
+ } else {
+ skip_ = NULL;
+ should_skip_ = false;
+ skip_repeat_ = false;
+ }
+}
+
+void Scanner::Skip(const char* re) {
+ delete skip_;
+ if (re != NULL) {
+ skip_ = new RE(re);
+ should_skip_ = true;
+ skip_repeat_ = false;
+ ConsumeSkip();
+ } else {
+ skip_ = NULL;
+ should_skip_ = false;
+ skip_repeat_ = false;
+ }
+}
+
+void Scanner::DisableSkip() {
+ assert(skip_ != NULL);
+ should_skip_ = false;
+}
+
+void Scanner::EnableSkip() {
+ assert(skip_ != NULL);
+ should_skip_ = true;
+ ConsumeSkip();
+}
+
+int Scanner::LineNumber() const {
+ // TODO: Make it more efficient by keeping track of the last point
+ // where we computed line numbers and counting newlines since then.
+ // We could use std:count, but not all systems have it. :-(
+ int count = 1;
+ for (const char* p = data_.data(); p < input_.data(); ++p)
+ if (*p == '\n')
+ ++count;
+ return count;
+}
+
+int Scanner::Offset() const {
+ return (int)(input_.data() - data_.c_str());
+}
+
+bool Scanner::LookingAt(const RE& re) const {
+ int consumed;
+ return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
+}
+
+
+// helper function to consume *skip_ and honour save_comments_
+void Scanner::ConsumeSkip() {
+ const char* start_data = input_.data();
+ while (skip_->Consume(&input_)) {
+ if (!skip_repeat_) {
+ // Only one skip allowed.
+ break;
+ }
+ }
+ if (save_comments_) {
+ if (comments_ == NULL) {
+ comments_ = new vector<StringPiece>;
+ }
+ // already pointing one past end, so no need to +1
+ int length = (int)(input_.data() - start_data);
+ if (length > 0) {
+ comments_->push_back(StringPiece(start_data, length));
+ }
+ }
+}
+
+
+void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
+ // short circuit out if we've not yet initialized comments_
+ // (e.g., when save_comments is false)
+ if (!comments_) {
+ return;
+ }
+ // TODO: if we guarantee that comments_ will contain StringPieces
+ // that are ordered by their start, then we can do a binary search
+ // for the first StringPiece at or past start and then scan for the
+ // ones contained in the range, quit early (use equal_range or
+ // lower_bound)
+ for (vector<StringPiece>::const_iterator it = comments_->begin();
+ it != comments_->end(); ++it) {
+ if ((it->data() >= data_.c_str() + start &&
+ it->data() + it->size() <= data_.c_str() + end)) {
+ ranges->push_back(*it);
+ }
+ }
+}
+
+
+void Scanner::GetNextComments(vector<StringPiece> *ranges) {
+ // short circuit out if we've not yet initialized comments_
+ // (e.g., when save_comments is false)
+ if (!comments_) {
+ return;
+ }
+ for (vector<StringPiece>::const_iterator it =
+ comments_->begin() + comments_offset_;
+ it != comments_->end(); ++it) {
+ ranges->push_back(*it);
+ ++comments_offset_;
+ }
+}
+
+} // namespace pcrecpp
diff --git a/pcrecpp/pcre_scanner_unittest.cc b/pcrecpp/pcre_scanner_unittest.cc
new file mode 100644
index 00000000..c00312c4
--- /dev/null
+++ b/pcrecpp/pcre_scanner_unittest.cc
@@ -0,0 +1,161 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Greg J. Badros
+//
+// Unittest for scanner, especially GetNextComments and GetComments()
+// functionality.
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h> /* for strchr */
+#include <string>
+#include <vector>
+
+#include "pcrecpp.h"
+#include "pcre_stringpiece.h"
+#include "pcre_scanner.h"
+
+#define FLAGS_unittest_stack_size 49152
+
+// Dies with a fatal error if the two values are not equal.
+#define CHECK_EQ(a, b) do { \
+ if ( (a) != (b) ) { \
+ fprintf(stderr, "%s:%d: Check failed because %s != %s\n", \
+ __FILE__, __LINE__, #a, #b); \
+ exit(1); \
+ } \
+} while (0)
+
+using std::vector;
+using pcrecpp::StringPiece;
+using pcrecpp::Scanner;
+
+static void TestScanner() {
+ const char input[] = "\n"
+ "alpha = 1; // this sets alpha\n"
+ "bravo = 2; // bravo is set here\n"
+ "gamma = 33; /* and here is gamma */\n";
+
+ const char *re = "(\\w+) = (\\d+);";
+
+ Scanner s(input);
+ string var;
+ int number;
+ s.SkipCXXComments();
+ s.set_save_comments(true);
+ vector<StringPiece> comments;
+
+ s.Consume(re, &var, &number);
+ CHECK_EQ(var, "alpha");
+ CHECK_EQ(number, 1);
+ CHECK_EQ(s.LineNumber(), 3);
+ s.GetNextComments(&comments);
+ CHECK_EQ(comments.size(), 1);
+ CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
+ comments.resize(0);
+
+ s.Consume(re, &var, &number);
+ CHECK_EQ(var, "bravo");
+ CHECK_EQ(number, 2);
+ s.GetNextComments(&comments);
+ CHECK_EQ(comments.size(), 1);
+ CHECK_EQ(comments[0].as_string(), " // bravo is set here\n");
+ comments.resize(0);
+
+ s.Consume(re, &var, &number);
+ CHECK_EQ(var, "gamma");
+ CHECK_EQ(number, 33);
+ s.GetNextComments(&comments);
+ CHECK_EQ(comments.size(), 1);
+ CHECK_EQ(comments[0].as_string(), " /* and here is gamma */\n");
+ comments.resize(0);
+
+ s.GetComments(0, sizeof(input), &comments);
+ CHECK_EQ(comments.size(), 3);
+ CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
+ CHECK_EQ(comments[1].as_string(), " // bravo is set here\n");
+ CHECK_EQ(comments[2].as_string(), " /* and here is gamma */\n");
+ comments.resize(0);
+
+ s.GetComments(0, (int)(strchr(input, '/') - input), &comments);
+ CHECK_EQ(comments.size(), 0);
+ comments.resize(0);
+
+ s.GetComments((int)(strchr(input, '/') - input - 1), sizeof(input),
+ &comments);
+ CHECK_EQ(comments.size(), 3);
+ CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
+ CHECK_EQ(comments[1].as_string(), " // bravo is set here\n");
+ CHECK_EQ(comments[2].as_string(), " /* and here is gamma */\n");
+ comments.resize(0);
+
+ s.GetComments((int)(strchr(input, '/') - input - 1),
+ (int)(strchr(input + 1, '\n') - input + 1), &comments);
+ CHECK_EQ(comments.size(), 1);
+ CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
+ comments.resize(0);
+}
+
+static void TestBigComment() {
+ string input;
+ for (int i = 0; i < 1024; ++i) {
+ char buf[1024]; // definitely big enough
+ sprintf(buf, " # Comment %d\n", i);
+ input += buf;
+ }
+ input += "name = value;\n";
+
+ Scanner s(input.c_str());
+ s.SetSkipExpression("\\s+|#.*\n");
+
+ string name;
+ string value;
+ s.Consume("(\\w+) = (\\w+);", &name, &value);
+ CHECK_EQ(name, "name");
+ CHECK_EQ(value, "value");
+}
+
+// TODO: also test scanner and big-comment in a thread with a
+// small stack size
+
+int main(int argc, char** argv) {
+ (void)argc;
+ (void)argv;
+ TestScanner();
+ TestBigComment();
+
+ // Done
+ printf("OK\n");
+
+ return 0;
+}
diff --git a/pcrecpp/pcre_stringpiece.cc b/pcrecpp/pcre_stringpiece.cc
new file mode 100644
index 00000000..599e466c
--- /dev/null
+++ b/pcrecpp/pcre_stringpiece.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wilsonh@google.com (Wilson Hsieh)
+//
+
+#include <iostream>
+#include "pcrecpp_internal.h"
+#include "pcre_stringpiece.h"
+
+std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) {
+ return (o << piece.as_string());
+}
diff --git a/pcrecpp/pcre_stringpiece_unittest.cc b/pcrecpp/pcre_stringpiece_unittest.cc
new file mode 100644
index 00000000..1c4759da
--- /dev/null
+++ b/pcrecpp/pcre_stringpiece_unittest.cc
@@ -0,0 +1,152 @@
+// Copyright 2003 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <map>
+#include <algorithm> // for make_pair
+
+#include "pcrecpp.h"
+#include "pcre_stringpiece.h"
+
+// CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// CHECK(fp->Write(x) == 4)
+#define CHECK(condition) do { \
+ if (!(condition)) { \
+ fprintf(stderr, "%s:%d: Check failed: %s\n", \
+ __FILE__, __LINE__, #condition); \
+ exit(1); \
+ } \
+} while (0)
+
+using pcrecpp::StringPiece;
+
+static void CheckSTLComparator() {
+ string s1("foo");
+ string s2("bar");
+ string s3("baz");
+
+ StringPiece p1(s1);
+ StringPiece p2(s2);
+ StringPiece p3(s3);
+
+ typedef std::map<StringPiece, int> TestMap;
+ TestMap map;
+
+ map.insert(std::make_pair(p1, 0));
+ map.insert(std::make_pair(p2, 1));
+ map.insert(std::make_pair(p3, 2));
+
+ CHECK(map.size() == 3);
+
+ TestMap::const_iterator iter = map.begin();
+ CHECK(iter->second == 1);
+ ++iter;
+ CHECK(iter->second == 2);
+ ++iter;
+ CHECK(iter->second == 0);
+ ++iter;
+ CHECK(iter == map.end());
+
+ TestMap::iterator new_iter = map.find("zot");
+ CHECK(new_iter == map.end());
+
+ new_iter = map.find("bar");
+ CHECK(new_iter != map.end());
+
+ map.erase(new_iter);
+ CHECK(map.size() == 2);
+
+ iter = map.begin();
+ CHECK(iter->second == 2);
+ ++iter;
+ CHECK(iter->second == 0);
+ ++iter;
+ CHECK(iter == map.end());
+}
+
+static void CheckComparisonOperators() {
+#define CMP_Y(op, x, y) \
+ CHECK( (StringPiece((x)) op StringPiece((y)))); \
+ CHECK( (StringPiece((x)).compare(StringPiece((y))) op 0))
+
+#define CMP_N(op, x, y) \
+ CHECK(!(StringPiece((x)) op StringPiece((y)))); \
+ CHECK(!(StringPiece((x)).compare(StringPiece((y))) op 0))
+
+ CMP_Y(==, "", "");
+ CMP_Y(==, "a", "a");
+ CMP_Y(==, "aa", "aa");
+ CMP_N(==, "a", "");
+ CMP_N(==, "", "a");
+ CMP_N(==, "a", "b");
+ CMP_N(==, "a", "aa");
+ CMP_N(==, "aa", "a");
+
+ CMP_N(!=, "", "");
+ CMP_N(!=, "a", "a");
+ CMP_N(!=, "aa", "aa");
+ CMP_Y(!=, "a", "");
+ CMP_Y(!=, "", "a");
+ CMP_Y(!=, "a", "b");
+ CMP_Y(!=, "a", "aa");
+ CMP_Y(!=, "aa", "a");
+
+ CMP_Y(<, "a", "b");
+ CMP_Y(<, "a", "aa");
+ CMP_Y(<, "aa", "b");
+ CMP_Y(<, "aa", "bb");
+ CMP_N(<, "a", "a");
+ CMP_N(<, "b", "a");
+ CMP_N(<, "aa", "a");
+ CMP_N(<, "b", "aa");
+ CMP_N(<, "bb", "aa");
+
+ CMP_Y(<=, "a", "a");
+ CMP_Y(<=, "a", "b");
+ CMP_Y(<=, "a", "aa");
+ CMP_Y(<=, "aa", "b");
+ CMP_Y(<=, "aa", "bb");
+ CMP_N(<=, "b", "a");
+ CMP_N(<=, "aa", "a");
+ CMP_N(<=, "b", "aa");
+ CMP_N(<=, "bb", "aa");
+
+ CMP_N(>=, "a", "b");
+ CMP_N(>=, "a", "aa");
+ CMP_N(>=, "aa", "b");
+ CMP_N(>=, "aa", "bb");
+ CMP_Y(>=, "a", "a");
+ CMP_Y(>=, "b", "a");
+ CMP_Y(>=, "aa", "a");
+ CMP_Y(>=, "b", "aa");
+ CMP_Y(>=, "bb", "aa");
+
+ CMP_N(>, "a", "a");
+ CMP_N(>, "a", "b");
+ CMP_N(>, "a", "aa");
+ CMP_N(>, "aa", "b");
+ CMP_N(>, "aa", "bb");
+ CMP_Y(>, "b", "a");
+ CMP_Y(>, "aa", "a");
+ CMP_Y(>, "b", "aa");
+ CMP_Y(>, "bb", "aa");
+
+#undef CMP_Y
+#undef CMP_N
+}
+
+int main(int argc, char** argv) {
+ (void)argc;
+ (void)argv;
+ CheckComparisonOperators();
+ CheckSTLComparator();
+
+ printf("OK\n");
+ return 0;
+}
diff --git a/pcrecpp/pcrecpp.cc b/pcrecpp/pcrecpp.cc
new file mode 100644
index 00000000..2c37c44e
--- /dev/null
+++ b/pcrecpp/pcrecpp.cc
@@ -0,0 +1,727 @@
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
+#include <string.h> /* for memcpy */
+#include <assert.h>
+#include <errno.h>
+#include <string>
+#include <algorithm>
+
+#include "pcrecpp_internal.h"
+#include "pcre2.h"
+#include "pcrecpp.h"
+#include "pcre_stringpiece.h"
+
+
+namespace pcrecpp {
+
+// If the user doesn't ask for any options, we just use this one
+static RE_Options default_options;
+
+void RE::Init(const string& pat, const RE_Options* options) {
+ pattern_ = pat;
+ if (options == NULL) {
+ options_ = default_options;
+ } else {
+ options_ = *options;
+ }
+ error_ = "";
+ re_full_ = NULL;
+ re_partial_ = NULL;
+
+ re_partial_ = Compile(UNANCHORED);
+ if (re_partial_ != NULL) {
+ re_full_ = Compile(ANCHOR_BOTH);
+ }
+}
+
+void RE::Cleanup() {
+ if (re_full_ != NULL) pcre2_code_free(re_full_);
+ if (re_partial_ != NULL) pcre2_code_free(re_partial_);
+ error_ = "";
+}
+
+
+RE::~RE() {
+ Cleanup();
+}
+
+static void format_pcre_error(int error, string & str) {
+ PCRE2_UCHAR8 buffer[256];
+ auto rc = pcre2_get_error_message(error, buffer, 256);
+ str.assign(reinterpret_cast<string::value_type*>(buffer));
+ if (rc == PCRE2_ERROR_NOMEMORY) {
+ str.append("...");
+ }
+}
+
+pcre2_code* RE::Compile(Anchor anchor) {
+ // First, convert RE_Options into pcre options
+ int pcre_options = 0;
+ pcre_options = options_.all_options();
+ typedef std::unique_ptr<pcre2_compile_context,
+ decltype(pcre2_compile_context_free)*> compile_context_ptr;
+ compile_context_ptr compile_context(NULL, pcre2_compile_context_free);
+
+ // As of pcre2 the newline mode must be passed through the compile context.
+ // So we only need one if the newline mode is actually set.
+ if (options_.newline_mode()) {
+ compile_context = compile_context_ptr(pcre2_compile_context_create(NULL),
+ pcre2_compile_context_free);
+ if (!compile_context) {
+ error_ = "Unable to allocate memory for pcre2_compile_congext";
+ return NULL;
+ }
+ if (pcre2_set_newline(compile_context.get(),
+ options_.newline_mode()) == PCRE2_ERROR_BADDATA) {
+ error_ = "REOptions: bad newline mode given";
+ return NULL;
+ }
+ }
+
+ // Special treatment for anchoring. This is needed because at
+ // runtime pcre only provides an option for anchoring at the
+ // beginning of a string (unless you use offset).
+ //
+ // There are three types of anchoring we want:
+ // UNANCHORED Compile the original pattern, and use
+ // a pcre unanchored match.
+ // ANCHOR_START Compile the original pattern, and use
+ // a pcre anchored match.
+ // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
+ // and use a pcre anchored match.
+
+ int compile_error;
+ PCRE2_SIZE eoffset;
+ pcre2_code* re;
+ if (anchor != ANCHOR_BOTH) {
+ re = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern_.c_str()),
+ pattern_.length(), pcre_options, &compile_error,
+ &eoffset, compile_context.get());
+ } else {
+ // Tack a '\z' at the end of RE. Parenthesize it first so that
+ // the '\z' applies to all top-level alternatives in the regexp.
+ string wrapped = "(?:"; // A non-counting grouping operator
+ wrapped += pattern_;
+ wrapped += ")\\z";
+ re = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(wrapped.c_str()),
+ wrapped.length(), pcre_options, &compile_error, &eoffset,
+ compile_context.get());
+ }
+ if (re == NULL) {
+ format_pcre_error(compile_error, error_);
+ }
+ return re;
+}
+
+/***** Matching interfaces *****/
+
+bool RE::Replace(const StringPiece& rewrite,
+ string *str) const {
+ pcre2_match_data_ptr match_data;
+ int matches = TryMatch(*str, 0, UNANCHORED, true, match_data);
+ if (matches == 0)
+ return false;
+
+ string s;
+ if (!Rewrite(&s, rewrite, *str, match_data))
+ return false;
+
+ auto vec = pcre2_get_ovector_pointer(match_data.get());
+
+ assert(vec[0] >= 0);
+ assert(vec[1] >= 0);
+ str->replace(vec[0], vec[1] - vec[0], s);
+ return true;
+}
+
+static bool is_multi_char_newline_mode(int value) {
+ switch (value) {
+ case PCRE2_NEWLINE_CR:
+ case PCRE2_NEWLINE_LF:
+ return false;
+ case PCRE2_NEWLINE_CRLF:
+ case PCRE2_NEWLINE_ANY:
+ case PCRE2_NEWLINE_ANYCRLF:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int RE::GlobalReplace(const StringPiece& rewrite,
+ string *str) const {
+ int count = 0;
+ string out;
+ int start = 0;
+ bool last_match_was_empty_string = false;
+ pcre2_match_data_ptr match_data;
+
+ while (start <= static_cast<int>(str->length())) {
+ // If the previous match was for the empty string, we shouldn't
+ // just match again: we'll match in the same way and get an
+ // infinite loop. Instead, we do the match in a special way:
+ // anchored -- to force another try at the same position --
+ // and with a flag saying that this time, ignore empty matches.
+ // If this special match returns, that means there's a non-empty
+ // match at this position as well, and we can continue. If not,
+ // we do what perl does, and just advance by one.
+ // Notice that perl prints '@@@' for this;
+ // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
+ int matches;
+ if (last_match_was_empty_string) {
+ matches = TryMatch(*str, start, ANCHOR_START, false, match_data);
+ if (matches <= 0) {
+ int matchend = start + 1; // advance one character.
+ // If the current char is CR and we're in CRLF mode, skip LF too.
+ // Note it's better to call pcre2_pattern_info() than to examine
+ // all_options(), since options_ could have changed between
+ // compile-time and now, but this is simpler and safe enough.
+ // Modified by PH to add ANY and ANYCRLF.
+ if (matchend < static_cast<int>(str->length()) &&
+ (*str)[start] == '\r' && (*str)[matchend] == '\n' &&
+ is_multi_char_newline_mode(options_.newline_mode())) {
+ matchend++;
+ }
+ // We also need to advance more than one char if we're in utf8 mode.
+#ifdef SUPPORT_UTF8
+ if (options_.utf8()) {
+ while (matchend < static_cast<int>(str->length()) &&
+ ((*str)[matchend] & 0xc0) == 0x80)
+ matchend++;
+ }
+#endif
+ if (start < static_cast<int>(str->length()))
+ out.append(*str, start, matchend - start);
+ start = matchend;
+ last_match_was_empty_string = false;
+ continue;
+ }
+ } else {
+ matches = TryMatch(*str, start, UNANCHORED, true, match_data);
+ if (matches <= 0)
+ break;
+ }
+ auto vec = pcre2_get_ovector_pointer(match_data.get());
+ int matchstart = vec[0], matchend = vec[1];
+ assert(matchstart >= start);
+ assert(matchend >= matchstart);
+ out.append(*str, start, matchstart - start);
+ Rewrite(&out, rewrite, *str, match_data);
+ start = matchend;
+ count++;
+ last_match_was_empty_string = (matchstart == matchend);
+ }
+
+ if (count == 0)
+ return 0;
+
+ if (start < static_cast<int>(str->length()))
+ out.append(*str, start, str->length() - start);
+ swap(out, *str);
+ return count;
+}
+
+bool RE::Extract(const StringPiece& rewrite,
+ const StringPiece& text,
+ string *out) const {
+ pcre2_match_data_ptr match_data;
+ int matches = TryMatch(text, 0, UNANCHORED, true, match_data);
+ if (matches == 0)
+ return false;
+ out->erase();
+ return Rewrite(out, rewrite, text, match_data);
+}
+
+/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
+ string result;
+
+ // Escape any ascii character not in [A-Za-z_0-9].
+ //
+ // Note that it's legal to escape a character even if it has no
+ // special meaning in a regular expression -- so this function does
+ // that. (This also makes it identical to the perl function of the
+ // same name; see `perldoc -f quotemeta`.) The one exception is
+ // escaping NUL: rather than doing backslash + NUL, like perl does,
+ // we do '\0', because pcre itself doesn't take embedded NUL chars.
+ for (int ii = 0; ii < unquoted.size(); ++ii) {
+ // Note that using 'isalnum' here raises the benchmark time from
+ // 32ns to 58ns:
+ if (unquoted[ii] == '\0') {
+ result += "\\0";
+ } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
+ result += '\\';
+ result += unquoted[ii];
+ } else {
+ result += unquoted[ii];
+ }
+ }
+
+ return result;
+}
+
+/***** Actual matching and rewriting code *****/
+int RE::TryMatch(const StringPiece& text,
+ int startpos,
+ Anchor anchor,
+ bool empty_ok,
+ pcre2_match_data_ptr & match_data) const {
+ typedef std::unique_ptr<pcre2_match_context,
+ decltype(pcre2_match_context_free)*> match_context_ptr;
+
+ pcre2_code* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
+ if (re == NULL) {
+ //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
+ return 0;
+ }
+ match_context_ptr match_context = match_context_ptr(
+ pcre2_match_context_create(NULL),
+ pcre2_match_context_free);
+ if (!match_context)
+ return 0;
+
+ if (options_.match_limit() > 0) {
+ pcre2_set_match_limit(match_context.get(), options_.match_limit());
+ }
+ if (options_.match_limit_recursion() > 0) {
+ pcre2_set_recursion_limit(match_context.get(),
+ options_.match_limit_recursion());
+ }
+
+ match_data = pcre2_match_data_ptr(
+ pcre2_match_data_create_from_pattern(re, NULL),
+ pcre2_match_data_free);
+ if (!match_data) {
+ return 0;
+ }
+
+ // int options = 0;
+ // Changed by PH as a result of bugzilla #1288
+ int options = (options_.all_options() & PCRE2_NO_UTF_CHECK);
+
+ if (anchor != UNANCHORED)
+ options |= PCRE2_ANCHORED;
+ if (!empty_ok)
+ options |= PCRE2_NOTEMPTY;
+
+ int rc = pcre2_match(
+ re, reinterpret_cast<PCRE2_SPTR>((text.empty()) ? "" : text.data()),
+ text.size(), startpos, options, match_data.get(), match_context.get());
+
+ // Handle errors
+ if (rc == PCRE2_ERROR_NOMATCH) {
+ return 0;
+ }
+ if (rc == PCRE2_ERROR_PARTIAL) {
+ // not sure what to do with partial yet
+ return 0;
+ } else if (rc < 0) {
+ // For any other error condition also return 0.
+ return 0;
+ }
+
+ return rc; // return number of matches found
+}
+
+bool RE::DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ int* consumed,
+ const Arg* args,
+ int n) const {
+ pcre2_match_data_ptr match_data;
+ int matches = TryMatch(text, 0, anchor, true, match_data);
+ assert(matches >= 0); // TryMatch never returns negatives
+ if (matches == 0)
+ return false;
+
+ auto vec = pcre2_get_ovector_pointer(match_data.get());
+
+ // allow for NULL
+ if (consumed != NULL)
+ *consumed = vec[1];
+
+ if (n == 0 || args == NULL) {
+ // We are not interested in results
+ return true;
+ }
+
+ if (NumberOfCapturingGroups() < n) {
+ // RE has fewer capturing groups than number of arg pointers passed in
+ return false;
+ }
+
+ // If we got here, we must have matched the whole pattern.
+ // We do not need (can not do) any more checks on the value of 'matches' here
+ // -- see the comment for TryMatch.
+ for (int i = 0; i < n; i++) {
+ const int start = vec[2*(i+1)];
+ const int limit = vec[2*(i+1)+1];
+ if (!args[i].Parse(text.data() + start, limit - start)) {
+ // TODO: Should we indicate what the error was?
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool RE::DoMatch(const StringPiece& text,
+ Anchor anchor,
+ int* consumed,
+ Arg const args[],
+ int n) const {
+ assert(n >= 0);
+ bool retval = DoMatchImpl(text, anchor, consumed, args, n);
+ return retval;
+}
+
+bool RE::Rewrite(string *out, const StringPiece &rewrite,
+ const StringPiece &text,
+ pcre2_match_data_ptr const & match_data) const {
+ auto veclen = pcre2_get_ovector_count(match_data.get());
+ auto vec = pcre2_get_ovector_pointer(match_data.get());
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ int c = *s;
+ if (c == '\\') {
+ c = *++s;
+ if (isdigit(c)) {
+ decltype(veclen) n = (c - '0');
+ if (n >= veclen) {
+ //fprintf(stderr, requested group %d in regexp %.*s\n",
+ // n, rewrite.size(), rewrite.data());
+ return false;
+ }
+ int start = vec[2 * n];
+ if (start >= 0)
+ out->append(text.data() + start, vec[2 * n + 1] - start);
+ } else if (c == '\\') {
+ *out += '\\';
+ } else {
+ //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
+ // rewrite.size(), rewrite.data());
+ return false;
+ }
+ } else {
+ *out += c;
+ }
+ }
+ return true;
+}
+
+// Return the number of capturing subpatterns, or -1 if the
+// regexp wasn't valid on construction.
+int RE::NumberOfCapturingGroups() const {
+ if (re_partial_ == NULL) return -1;
+
+ int result;
+ int pcre_retval = pcre2_pattern_info(re_partial_, PCRE2_INFO_CAPTURECOUNT,
+ &result);
+ assert(pcre_retval == 0);
+ return result;
+}
+
+/***** Parsers for various types *****/
+
+bool Arg::parse_null(const char* str, int n, void* dest) {
+ (void)str;
+ (void)n;
+ // We fail if somebody asked us to store into a non-NULL void* pointer
+ return (dest == NULL);
+}
+
+bool Arg::parse_string(const char* str, int n, void* dest) {
+ if (dest == NULL) return true;
+ reinterpret_cast<string*>(dest)->assign(str, n);
+ return true;
+}
+
+bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
+ if (dest == NULL) return true;
+ reinterpret_cast<StringPiece*>(dest)->set(str, n);
+ return true;
+}
+
+bool Arg::parse_char(const char* str, int n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<char*>(dest)) = str[0];
+ return true;
+}
+
+bool Arg::parse_uchar(const char* str, int n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+ return true;
+}
+
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+
+// REQUIRES "buf" must have length at least kMaxNumberLength+1
+// REQUIRES "n > 0"
+// Copies "str" into "buf" and null-terminates if necessary.
+// Returns one of:
+// a. "str" if no termination is needed
+// b. "buf" if the string was copied and null-terminated
+// c. "" if the input was invalid and has no hope of being parsed
+static const char* TerminateNumber(char* buf, const char* str, int n) {
+ if ((n > 0) && isspace(*str)) {
+ // We are less forgiving than the strtoxxx() routines and do not
+ // allow leading spaces.
+ return "";
+ }
+
+ // See if the character right after the input text may potentially
+ // look like a digit.
+ if (isdigit(str[n]) ||
+ ((str[n] >= 'a') && (str[n] <= 'f')) ||
+ ((str[n] >= 'A') && (str[n] <= 'F'))) {
+ if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ return buf;
+ } else {
+ // We can parse right out of the supplied string, so return it.
+ return str;
+ }
+}
+
+bool Arg::parse_long_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+ long r = strtol(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long*>(dest)) = r;
+ return true;
+}
+
+bool Arg::parse_ulong_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') return false; // strtoul() on a negative number?!
+ char* end;
+ errno = 0;
+ unsigned long r = strtoul(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long*>(dest)) = r;
+ return true;
+}
+
+bool Arg::parse_short_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
+ return true;
+}
+
+bool Arg::parse_ushort_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if (r > USHRT_MAX) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
+ return true;
+}
+
+bool Arg::parse_int_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if (r < INT_MIN || r > INT_MAX) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<int*>(dest)) = r;
+ return true;
+}
+
+bool Arg::parse_uint_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if (r > UINT_MAX) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned int*>(dest)) = r;
+ return true;
+}
+
+bool Arg::parse_longlong_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+#ifndef HAVE_LONG_LONG
+ return false;
+#else
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+#if defined HAVE_STRTOQ
+ long long r = strtoq(str, &end, radix);
+#elif defined HAVE_STRTOLL
+ long long r = strtoll(str, &end, radix);
+#elif defined HAVE__STRTOI64
+ long long r = _strtoi64(str, &end, radix);
+#elif defined HAVE_STRTOIMAX
+ long long r = strtoimax(str, &end, radix);
+#else
+#error parse_longlong_radix: cannot convert input to a long-long
+#endif
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long long*>(dest)) = r;
+ return true;
+#endif /* HAVE_LONG_LONG */
+}
+
+bool Arg::parse_ulonglong_radix(const char* str,
+ int n,
+ void* dest,
+ int radix) {
+#ifndef HAVE_UNSIGNED_LONG_LONG
+ return false;
+#else
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') return false; // strtoull() on a negative number?!
+ char* end;
+ errno = 0;
+#if defined HAVE_STRTOQ
+ unsigned long long r = strtouq(str, &end, radix);
+#elif defined HAVE_STRTOLL
+ unsigned long long r = strtoull(str, &end, radix);
+#elif defined HAVE__STRTOI64
+ unsigned long long r = _strtoui64(str, &end, radix);
+#elif defined HAVE_STRTOIMAX
+ unsigned long long r = strtoumax(str, &end, radix);
+#else
+#error parse_ulonglong_radix: cannot convert input to a long-long
+#endif
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long long*>(dest)) = r;
+ return true;
+#endif /* HAVE_UNSIGNED_LONG_LONG */
+}
+
+bool Arg::parse_double(const char* str, int n, void* dest) {
+ if (n == 0) return false;
+ static const int kMaxLength = 200;
+ char buf[kMaxLength];
+ if (n >= kMaxLength) return false;
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ errno = 0;
+ char* end;
+ double r = strtod(buf, &end);
+ if (end != buf + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<double*>(dest)) = r;
+ return true;
+}
+
+bool Arg::parse_float(const char* str, int n, void* dest) {
+ double r;
+ if (!parse_double(str, n, &r)) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
+ return true;
+}
+
+
+#define DEFINE_INTEGER_PARSERS(name) \
+ bool Arg::parse_##name(const char* str, int n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 10); \
+ } \
+ bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 16); \
+ } \
+ bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 8); \
+ } \
+ bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 0); \
+ }
+
+DEFINE_INTEGER_PARSERS(short) /* */
+DEFINE_INTEGER_PARSERS(ushort) /* */
+DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
+DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
+DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
+DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
+DEFINE_INTEGER_PARSERS(longlong) /* */
+DEFINE_INTEGER_PARSERS(ulonglong) /* */
+
+#undef DEFINE_INTEGER_PARSERS
+
+} // namespace pcrecpp
diff --git a/pcrecpp/pcrecpp_internal.h b/pcrecpp/pcrecpp_internal.h
new file mode 100644
index 00000000..827f9e04
--- /dev/null
+++ b/pcrecpp/pcrecpp_internal.h
@@ -0,0 +1,71 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/*
+Copyright (c) 2005, Google Inc.
+All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifndef PCRECPP_INTERNAL_H
+#define PCRECPP_INTERNAL_H
+
+/* When compiling a DLL for Windows, the exported symbols have to be declared
+using some MS magic. I found some useful information on this web page:
+http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
+information there, using __declspec(dllexport) without "extern" we have a
+definition; with "extern" we have a declaration. The settings here override the
+setting in pcre.h. We use:
+
+ PCRECPP_EXP_DECL for declarations
+ PCRECPP_EXP_DEFN for definitions of exported functions
+
+*/
+
+#ifndef PCRECPP_EXP_DECL
+# ifdef _WIN32
+# ifndef PCRE_STATIC
+# define PCRECPP_EXP_DECL extern __declspec(dllexport)
+# define PCRECPP_EXP_DEFN __declspec(dllexport)
+# else
+# define PCRECPP_EXP_DECL extern
+# define PCRECPP_EXP_DEFN
+# endif
+# else
+# define PCRECPP_EXP_DECL extern
+# define PCRECPP_EXP_DEFN
+# endif
+#endif
+
+#endif /* PCRECPP_INTERNAL_H */
+
+/* End of pcrecpp_internal.h */
diff --git a/pcrecpp/pcrecpp_unittest.cc b/pcrecpp/pcrecpp_unittest.cc
new file mode 100644
index 00000000..67f2398f
--- /dev/null
+++ b/pcrecpp/pcrecpp_unittest.cc
@@ -0,0 +1,1282 @@
+// -*- coding: utf-8 -*-
+//
+// Copyright (c) 2005 - 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+//
+// TODO: Test extractions for PartialMatch/Consume
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h> /* for memset and strcmp */
+#include <cassert>
+#include <vector>
+#include "pcrecpp.h"
+
+using pcrecpp::StringPiece;
+using pcrecpp::RE;
+using pcrecpp::RE_Options;
+using pcrecpp::Hex;
+using pcrecpp::Octal;
+using pcrecpp::CRadix;
+
+static bool VERBOSE_TEST = false;
+
+// CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// CHECK_EQ(fp->Write(x), 4)
+#define CHECK(condition) do { \
+ if (!(condition)) { \
+ fprintf(stderr, "%s:%d: Check failed: %s\n", \
+ __FILE__, __LINE__, #condition); \
+ exit(1); \
+ } \
+} while (0)
+
+#define CHECK_EQ(a, b) CHECK(a == b)
+
+static void Timing1(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:\\d+");
+ StringPiece p("ruby:1234");
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch(p));
+ }
+}
+
+static void Timing2(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:(\\d+)");
+ int i;
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch("ruby:1234", &i));
+ CHECK_EQ(i, 1234);
+ }
+}
+
+static void Timing3(int num_iters) {
+ string text_string;
+ for (int j = num_iters; j > 0; j--) {
+ text_string += "this is another line\n";
+ }
+
+ RE line_matcher(".*\n");
+ string line;
+ StringPiece text(text_string);
+ int counter = 0;
+ while (line_matcher.Consume(&text)) {
+ counter++;
+ }
+ printf("Matched %d lines\n", counter);
+}
+
+#if 0 // uncomment this if you have a way of defining VirtualProcessSize()
+
+static void LeakTest() {
+ // Check for memory leaks
+ unsigned long long initial_size = 0;
+ for (int i = 0; i < 100000; i++) {
+ if (i == 50000) {
+ initial_size = VirtualProcessSize();
+ printf("Size after 50000: %llu\n", initial_size);
+ }
+ char buf[100]; // definitely big enough
+ sprintf(buf, "pat%09d", i);
+ RE newre(buf);
+ }
+ uint64 final_size = VirtualProcessSize();
+ printf("Size after 100000: %llu\n", final_size);
+ const double growth = double(final_size - initial_size) / final_size;
+ printf("Growth: %0.2f%%", growth * 100);
+ CHECK(growth < 0.02); // Allow < 2% growth
+}
+
+#endif
+
+static void RadixTests() {
+ printf("Testing hex\n");
+
+#define CHECK_HEX(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ } while(0)
+
+ CHECK_HEX(short, 2bad);
+ CHECK_HEX(unsigned short, 2badU);
+ CHECK_HEX(int, dead);
+ CHECK_HEX(unsigned int, deadU);
+ CHECK_HEX(long, 7eadbeefL);
+ CHECK_HEX(unsigned long, deadbeefUL);
+#ifdef HAVE_LONG_LONG
+ CHECK_HEX(long long, 12345678deadbeefLL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
+#endif
+
+#undef CHECK_HEX
+
+ printf("Testing octal\n");
+
+#define CHECK_OCTAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ } while(0)
+
+ CHECK_OCTAL(short, 77777);
+ CHECK_OCTAL(unsigned short, 177777U);
+ CHECK_OCTAL(int, 17777777777);
+ CHECK_OCTAL(unsigned int, 37777777777U);
+ CHECK_OCTAL(long, 17777777777L);
+ CHECK_OCTAL(unsigned long, 37777777777UL);
+#ifdef HAVE_LONG_LONG
+ CHECK_OCTAL(long long, 777777777777777777777LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
+#endif
+
+#undef CHECK_OCTAL
+
+ printf("Testing decimal\n");
+
+#define CHECK_DECIMAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
+ CHECK_EQ(v, value); \
+ CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
+ CHECK_EQ(v, value); \
+ } while(0)
+
+ CHECK_DECIMAL(short, -1);
+ CHECK_DECIMAL(unsigned short, 9999);
+ CHECK_DECIMAL(int, -1000);
+ CHECK_DECIMAL(unsigned int, 12345U);
+ CHECK_DECIMAL(long, -10000000L);
+ CHECK_DECIMAL(unsigned long, 3083324652U);
+#ifdef HAVE_LONG_LONG
+ CHECK_DECIMAL(long long, -100000000000000LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
+#endif
+
+#undef CHECK_DECIMAL
+
+}
+
+static void TestReplace() {
+ printf("Testing Replace\n");
+
+ struct ReplaceTest {
+ const char *regexp;
+ const char *rewrite;
+ const char *original;
+ const char *single;
+ const char *global;
+ int global_count; // the expected return value from ReplaceAll
+ };
+ static const ReplaceTest tests[] = {
+ { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+ "\\2\\1ay",
+ "the quick brown fox jumps over the lazy dogs.",
+ "ethay quick brown fox jumps over the lazy dogs.",
+ "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
+ 9 },
+ { "\\w+",
+ "\\0-NOSPAM",
+ "paul.haahr@google.com",
+ "paul-NOSPAM.haahr@google.com",
+ "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
+ 4 },
+ { "^",
+ "(START)",
+ "foo",
+ "(START)foo",
+ "(START)foo",
+ 1 },
+ { "^",
+ "(START)",
+ "",
+ "(START)",
+ "(START)",
+ 1 },
+ { "$",
+ "(END)",
+ "",
+ "(END)",
+ "(END)",
+ 1 },
+ { "b",
+ "bb",
+ "ababababab",
+ "abbabababab",
+ "abbabbabbabbabb",
+ 5 },
+ { "b",
+ "bb",
+ "bbbbbb",
+ "bbbbbbb",
+ "bbbbbbbbbbbb",
+ 6 },
+ { "b+",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb",
+ 1 },
+ { "b*",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bbbb",
+ 2 },
+ { "b*",
+ "bb",
+ "aaaaa",
+ "bbaaaaa",
+ "bbabbabbabbabbabb",
+ 6 },
+ { "b*",
+ "bb",
+ "aa\naa\n",
+ "bbaa\naa\n",
+ "bbabbabb\nbbabbabb\nbb",
+ 7 },
+ { "b*",
+ "bb",
+ "aa\raa\r",
+ "bbaa\raa\r",
+ "bbabbabb\rbbabbabb\rbb",
+ 7 },
+ { "b*",
+ "bb",
+ "aa\r\naa\r\n",
+ "bbaa\r\naa\r\n",
+ "bbabbabb\r\nbbabbabb\r\nbb",
+ 7 },
+ // Check empty-string matching (it's tricky!)
+ { "aa|b*",
+ "@",
+ "aa",
+ "@",
+ "@@",
+ 2 },
+ { "b*|aa",
+ "@",
+ "aa",
+ "@aa",
+ "@@@",
+ 3 },
+#ifdef SUPPORT_UTF8
+ { "b*",
+ "bb",
+ "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
+ "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
+ "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
+ 5 },
+ { "b*",
+ "bb",
+ "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
+ "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
+ ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
+ "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
+ 9 },
+#endif
+ { "", NULL, NULL, NULL, NULL, 0 }
+ };
+
+#ifdef SUPPORT_UTF8
+ const bool support_utf8 = true;
+#else
+ const bool support_utf8 = false;
+#endif
+
+ for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+ RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF)
+ .set_utf(support_utf8));
+ assert(re.error().empty());
+ string one(t->original);
+ CHECK(re.Replace(t->rewrite, &one));
+ CHECK_EQ(one, t->single);
+ string all(t->original);
+ const int replace_count = re.GlobalReplace(t->rewrite, &all);
+ CHECK_EQ(all, t->global);
+ CHECK_EQ(replace_count, t->global_count);
+ }
+
+ // One final test: test \r\n replacement when we're not in CRLF mode
+ {
+ RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR)
+ .set_utf(support_utf8));
+ assert(re.error().empty());
+ string all("aa\r\naa\r\n");
+ CHECK_EQ(re.GlobalReplace("bb", &all), 9);
+ CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+ }
+ {
+ RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF)
+ .set_utf(support_utf8));
+ assert(re.error().empty());
+ string all("aa\r\naa\r\n");
+ CHECK_EQ(re.GlobalReplace("bb", &all), 9);
+ CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+ }
+ // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
+ // Alas, the answer depends on how pcre was compiled.
+}
+
+static void TestExtract() {
+ printf("Testing Extract\n");
+
+ string s;
+
+ CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
+ CHECK_EQ(s, "kremvax!boris");
+
+ // check the RE interface as well
+ CHECK(RE(".*").Extract("'\\0'", "foo", &s));
+ CHECK_EQ(s, "'foo'");
+ CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
+ CHECK_EQ(s, "'foo'");
+}
+
+static void TestConsume() {
+ printf("Testing Consume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(! r.Consume(&input, &word));
+}
+
+static void TestFindAndConsume() {
+ printf("Testing FindAndConsume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("(\\w+)"); // matches a word
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "cccc");
+ CHECK(! r.FindAndConsume(&input, &word));
+}
+
+static void TestMatchNumberPeculiarity() {
+ printf("Testing match-number peculiarity\n");
+
+ string word1;
+ string word2;
+ string word3;
+
+ RE r("(foo)|(bar)|(baz)");
+ CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
+ CHECK_EQ(word1, "foo");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "bar");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "baz");
+ CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
+
+ string a;
+ CHECK(RE("(foo)|hello").FullMatch("hello", &a));
+ CHECK_EQ(a, "");
+}
+
+static void TestRecursion() {
+ printf("Testing recursion\n");
+
+ // Get one string that passes (sometimes), one that never does.
+ string text_good("abcdefghijk");
+ string text_bad("acdefghijkl");
+
+ // According to pcretest, matching text_good against (\w+)*b
+ // requires match_limit of at least 8192, and match_recursion_limit
+ // of at least 37.
+
+ RE_Options options_ml;
+ options_ml.set_match_limit(8192);
+ RE re("(\\w+)*b", options_ml);
+ CHECK(re.PartialMatch(text_good) == true);
+ CHECK(re.PartialMatch(text_bad) == false);
+ CHECK(re.FullMatch(text_good) == false);
+ CHECK(re.FullMatch(text_bad) == false);
+
+ options_ml.set_match_limit(1024);
+ RE re2("(\\w+)*b", options_ml);
+ CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
+ CHECK(re2.PartialMatch(text_bad) == false);
+ CHECK(re2.FullMatch(text_good) == false);
+ CHECK(re2.FullMatch(text_bad) == false);
+
+ RE_Options options_mlr;
+ options_mlr.set_match_limit_recursion(50);
+ RE re3("(\\w+)*b", options_mlr);
+ CHECK(re3.PartialMatch(text_good) == true);
+ CHECK(re3.PartialMatch(text_bad) == false);
+ CHECK(re3.FullMatch(text_good) == false);
+ CHECK(re3.FullMatch(text_bad) == false);
+
+ options_mlr.set_match_limit_recursion(10);
+ RE re4("(\\w+)*b", options_mlr);
+ CHECK(re4.PartialMatch(text_good) == false);
+ CHECK(re4.PartialMatch(text_bad) == false);
+ CHECK(re4.FullMatch(text_good) == false);
+ CHECK(re4.FullMatch(text_bad) == false);
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
+ string quoted = RE::QuoteMeta(unquoted);
+ RE re(quoted, options);
+ CHECK(re.FullMatch(unquoted));
+}
+
+// A string containing meaningful regexp characters, which is then meta-
+// quoted, should not generally match a string the unquoted string does.
+static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
+ RE_Options options = RE_Options()) {
+ string quoted = RE::QuoteMeta(unquoted);
+ RE re(quoted, options);
+ CHECK(!re.FullMatch(should_not_match));
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+static void TestQuotaMetaSimple() {
+ TestQuoteMeta("foo");
+ TestQuoteMeta("foo.bar");
+ TestQuoteMeta("foo\\.bar");
+ TestQuoteMeta("[1-9]");
+ TestQuoteMeta("1.5-2.0?");
+ TestQuoteMeta("\\d");
+ TestQuoteMeta("Who doesn't like ice cream?");
+ TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+ TestQuoteMeta("((?!)xxx).*yyy");
+ TestQuoteMeta("([");
+ TestQuoteMeta(string("foo\0bar", 7));
+}
+
+static void TestQuoteMetaSimpleNegative() {
+ NegativeTestQuoteMeta("foo", "bar");
+ NegativeTestQuoteMeta("...", "bar");
+ NegativeTestQuoteMeta("\\.", ".");
+ NegativeTestQuoteMeta("\\.", "..");
+ NegativeTestQuoteMeta("(a)", "a");
+ NegativeTestQuoteMeta("(a|b)", "a");
+ NegativeTestQuoteMeta("(a|b)", "(a)");
+ NegativeTestQuoteMeta("(a|b)", "a|b");
+ NegativeTestQuoteMeta("[0-9]", "0");
+ NegativeTestQuoteMeta("[0-9]", "0-9");
+ NegativeTestQuoteMeta("[0-9]", "[9]");
+ NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+static void TestQuoteMetaLatin1() {
+ TestQuoteMeta("3\xb2 = 9");
+}
+
+static void TestQuoteMetaUtf8() {
+#ifdef SUPPORT_UTF8
+ TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
+ TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
+ TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
+ TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
+ TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
+ TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
+ TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
+ NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
+ "27\\\xc2\\\xb0",
+ pcrecpp::UTF8());
+#endif
+}
+
+static void TestQuoteMetaAll() {
+ printf("Testing QuoteMeta\n");
+ TestQuotaMetaSimple();
+ TestQuoteMetaSimpleNegative();
+ TestQuoteMetaLatin1();
+ TestQuoteMetaUtf8();
+}
+
+//
+// Options tests contributed by
+// Giuseppe Maxia, CTO, Stardata s.r.l.
+// July 2005
+//
+static void GetOneOptionResult(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ string expected) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if(VERBOSE_TEST)
+ printf("/%s/ finds \"%s\" within \"%s\" \n",
+ regex,
+ expected.c_str(),
+ str);
+ string captured("");
+ if (full)
+ RE(regex,options).FullMatch(str, &captured);
+ else
+ RE(regex,options).PartialMatch(str, &captured);
+ CHECK_EQ(captured, expected);
+}
+
+static void TestOneOption(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ bool assertive = true) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if (VERBOSE_TEST)
+ printf("'%s' %s /%s/ \n",
+ str,
+ (assertive? "matches" : "doesn't match"),
+ regex);
+ if (assertive) {
+ if (full)
+ CHECK(RE(regex,options).FullMatch(str));
+ else
+ CHECK(RE(regex,options).PartialMatch(str));
+ } else {
+ if (full)
+ CHECK(!RE(regex,options).FullMatch(str));
+ else
+ CHECK(!RE(regex,options).PartialMatch(str));
+ }
+}
+
+static void Test_CASELESS() {
+ RE_Options options;
+ RE_Options options2;
+
+ options.set_caseless(true);
+ TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
+ TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
+ TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
+
+ TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
+ TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
+ options.set_caseless(false);
+ TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
+}
+
+static void Test_MULTILINE() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world\n";
+
+ options.set_multiline(true);
+ TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
+ TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
+ TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
+ options.set_multiline(false);
+ TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
+}
+
+static void Test_DOTALL() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world";
+
+ options.set_dotall(true);
+ TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
+ TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
+ TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
+ options.set_dotall(false);
+ TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
+}
+
+static void Test_DOLLAR_ENDONLY() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world\n";
+
+ TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
+ options.set_dollar_endonly(true);
+ TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
+ TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
+}
+
+static void Test_EXTENDED() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world";
+
+ options.set_extended(true);
+ TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
+ TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
+ TestOneOption("EXTENDED (class)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ options,
+ false);
+
+ TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
+ TestOneOption("EXTENDED (function)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ pcrecpp::EXTENDED(),
+ false);
+
+ options.set_extended(false);
+ TestOneOption("no EXTENDED", "HELLO world", str, options, false);
+}
+
+static void Test_NO_AUTO_CAPTURE() {
+ RE_Options options;
+ const char *str = "HELLO world";
+ string captured;
+
+ printf("Testing Option <no NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses capture text\n");
+ RE re("(world|universe)$", options);
+ CHECK(re.Extract("\\1", str , &captured));
+ CHECK_EQ(captured, "world");
+ options.set_no_auto_capture(true);
+ printf("testing Option <NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses do not capture text\n");
+ re.Extract("\\1",str, &captured );
+ CHECK_EQ(captured, "world");
+}
+
+static void Test_UNGREEDY() {
+ RE_Options options;
+ const char *str = "HELLO, 'this' is the 'world'";
+
+ options.set_ungreedy(true);
+ GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
+ GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
+ GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
+
+ options.set_ungreedy(false);
+ GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
+ GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
+}
+
+static void Test_all_options() {
+ const char *str = "HELLO\n" "cruel\n" "world";
+ RE_Options options;
+ options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL);
+
+ TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
+ options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
+ TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED),
+ false);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options()
+ .set_multiline(true)
+ .set_extended(true),
+ false);
+
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
+
+}
+
+static void TestOptions() {
+ printf("Testing Options\n");
+ Test_CASELESS();
+ Test_MULTILINE();
+ Test_DOTALL();
+ Test_DOLLAR_ENDONLY();
+ Test_EXTENDED();
+ Test_NO_AUTO_CAPTURE();
+ Test_UNGREEDY();
+ Test_all_options();
+}
+
+static void TestConstructors() {
+ printf("Testing constructors\n");
+
+ RE_Options options;
+ options.set_dotall(true);
+ const char *str = "HELLO\n" "cruel\n" "world";
+
+ RE orig("HELLO.*world", options);
+ CHECK(orig.FullMatch(str));
+
+ RE copy1(orig);
+ CHECK(copy1.FullMatch(str));
+
+ RE copy2("not a match");
+ CHECK(!copy2.FullMatch(str));
+ copy2 = copy1;
+ CHECK(copy2.FullMatch(str));
+ copy2 = orig;
+ CHECK(copy2.FullMatch(str));
+
+ // Make sure when we assign to ourselves, nothing bad happens
+ orig = orig;
+ copy1 = copy1;
+ copy2 = copy2;
+ CHECK(orig.FullMatch(str));
+ CHECK(copy1.FullMatch(str));
+ CHECK(copy2.FullMatch(str));
+}
+
+int main(int argc, char** argv) {
+ // Treat any flag as --help
+ if (argc > 1 && argv[1][0] == '-') {
+ printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
+ " If 'timingX ###' is specified, run the given timing test\n"
+ " with the given number of iterations, rather than running\n"
+ " the default corectness test.\n", argv[0]);
+ return 0;
+ }
+
+ if (argc > 1) {
+ if ( argc == 2 || atoi(argv[2]) == 0) {
+ printf("timing mode needs a num-iters argument\n");
+ return 1;
+ }
+ if (!strcmp(argv[1], "timing1"))
+ Timing1(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing2"))
+ Timing2(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing3"))
+ Timing3(atoi(argv[2]));
+ else
+ printf("Unknown argument '%s'\n", argv[1]);
+ return 0;
+ }
+
+ printf("PCRE C++ wrapper tests\n");
+ printf("Testing FullMatch\n");
+
+ int i;
+ string s;
+
+ /***** FullMatch with no args *****/
+
+ CHECK(RE("h.*o").FullMatch("hello"));
+ CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
+ CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
+ CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
+ CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
+ CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
+
+ /***** FullMatch with args *****/
+
+ // Zero-arg
+ CHECK(RE("\\d+").FullMatch("1001"));
+
+ // Single-arg
+ CHECK(RE("(\\d+)").FullMatch("1001", &i));
+ CHECK_EQ(i, 1001);
+ CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
+ CHECK_EQ(i, -123);
+ CHECK(!RE("()\\d+").FullMatch("10", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
+ &i));
+
+ // Digits surrounding integer-arg
+ CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
+ CHECK_EQ(i, 23);
+ CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+ CHECK(RE("(\\d)").PartialMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+
+ // String-arg
+ CHECK(RE("h(.*)o").FullMatch("hello", &s));
+ CHECK_EQ(s, string("ell"));
+
+ // StringPiece-arg
+ StringPiece sp;
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
+ CHECK_EQ(sp.size(), 4);
+ CHECK(memcmp(sp.data(), "ruby", 4) == 0);
+ CHECK_EQ(i, 1234);
+
+ // Multi-arg
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Ignore non-void* NULL arg
+ CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
+ CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
+ CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
+ CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
+#ifdef HAVE_LONG_LONG
+ CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
+#endif
+ CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
+ CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
+
+ // Fail on non-void* NULL arg if the match doesn't parse for the given type.
+ CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
+ CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
+ CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
+ CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
+ CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
+
+ // Ignored arg
+ CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Type tests
+ {
+ char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, 'H');
+ }
+ {
+ unsigned char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, static_cast<unsigned char>('H'));
+ }
+ {
+ short v;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
+ CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
+ }
+ {
+ unsigned short v;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
+ CHECK(!RE("(\\d+)").FullMatch("65536", &v));
+ }
+ {
+ int v;
+ static const int max_value = 0x7fffffff;
+ static const int min_value = -max_value - 1;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
+ CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
+ }
+ {
+ unsigned int v;
+ static const unsigned int max_value = 0xfffffffful;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
+ CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
+ }
+#ifdef HAVE_LONG_LONG
+# if defined(__MINGW__) || defined(__MINGW32__)
+# define LLD "%I64d"
+# define LLU "%I64u"
+# else
+# define LLD "%lld"
+# define LLU "%llu"
+# endif
+ {
+ long long v;
+ static const long long max_value = 0x7fffffffffffffffLL;
+ static const long long min_value = -max_value - 1;
+ char buf[32]; // definitely big enough for a long long
+
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
+
+ sprintf(buf, LLD, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ sprintf(buf, LLD, min_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
+
+ sprintf(buf, LLD, max_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+
+ sprintf(buf, LLD, min_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
+ {
+ unsigned long long v;
+ long long v2;
+ static const unsigned long long max_value = 0xffffffffffffffffULL;
+ char buf[32]; // definitely big enough for a unsigned long long
+
+ CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
+
+ sprintf(buf, LLU, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+ {
+ float v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+ {
+ double v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+
+ // Check that matching is fully anchored
+ CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
+ CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
+ CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
+
+ // Braces
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
+ CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
+
+ // Complicated RE
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
+ CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
+
+ // Check full-match handling (needs '$' tacked on internally)
+ CHECK(RE("fo|foo").FullMatch("fo"));
+ CHECK(RE("fo|foo").FullMatch("foo"));
+ CHECK(RE("fo|foo$").FullMatch("fo"));
+ CHECK(RE("fo|foo$").FullMatch("foo"));
+ CHECK(RE("foo$").FullMatch("foo"));
+ CHECK(!RE("foo\\$").FullMatch("foo$bar"));
+ CHECK(!RE("fo|bar").FullMatch("fox"));
+
+ // Uncomment the following if we change the handling of '$' to
+ // prevent it from matching a trailing newline
+ if (false) {
+ // Check that we don't get bitten by pcre's special handling of a
+ // '\n' at the end of the string matching '$'
+ CHECK(!RE("foo$").PartialMatch("foo\n"));
+ }
+
+ // Number of args
+ int a[16];
+ CHECK(RE("").FullMatch(""));
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d){1}").FullMatch("1",
+ &a[0]));
+ CHECK_EQ(a[0], 1);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)").FullMatch("12",
+ &a[0], &a[1]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
+ &a[0], &a[1], &a[2]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
+ &a[0], &a[1], &a[2], &a[3]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4], &a[5]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+ "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
+ "1234567890123456",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7],
+ &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+ CHECK_EQ(a[7], 8);
+ CHECK_EQ(a[8], 9);
+ CHECK_EQ(a[9], 0);
+ CHECK_EQ(a[10], 1);
+ CHECK_EQ(a[11], 2);
+ CHECK_EQ(a[12], 3);
+ CHECK_EQ(a[13], 4);
+ CHECK_EQ(a[14], 5);
+ CHECK_EQ(a[15], 6);
+
+ /***** PartialMatch *****/
+
+ printf("Testing PartialMatch\n");
+
+ CHECK(RE("h.*o").PartialMatch("hello"));
+ CHECK(RE("h.*o").PartialMatch("othello"));
+ CHECK(RE("h.*o").PartialMatch("hello!"));
+ CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
+
+ /***** other tests *****/
+
+ RadixTests();
+ TestReplace();
+ TestExtract();
+ TestConsume();
+ TestFindAndConsume();
+ TestQuoteMetaAll();
+ TestMatchNumberPeculiarity();
+
+ // Check the pattern() accessor
+ {
+ const string kPattern = "http://([^/]+)/.*";
+ const RE re(kPattern);
+ CHECK_EQ(kPattern, re.pattern());
+ }
+
+ // Check RE error field.
+ {
+ RE re("foo");
+ CHECK(re.error().empty()); // Must have no error
+ }
+
+#ifdef SUPPORT_UTF8
+ // Check UTF-8 handling
+ {
+ printf("Testing UTF-8 handling\n");
+
+ // Three Japanese characters (nihongo)
+ const unsigned char utf8_string[] = {
+ 0xe6, 0x97, 0xa5, // 65e5
+ 0xe6, 0x9c, 0xac, // 627c
+ 0xe8, 0xaa, 0x9e, // 8a9e
+ 0
+ };
+ const unsigned char utf8_pattern[] = {
+ '.',
+ 0xe6, 0x9c, 0xac, // 627c
+ '.',
+ 0
+ };
+
+ // Both should match in either mode, bytes or UTF-8
+ RE re_test1(".........");
+ CHECK(re_test1.FullMatch(utf8_string));
+ RE re_test2("...", pcrecpp::UTF8());
+ CHECK(re_test2.FullMatch(utf8_string));
+
+ // Check that '.' matches one byte or UTF-8 character
+ // according to the mode.
+ string ss;
+ RE re_test3("(.)");
+ CHECK(re_test3.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6"));
+ RE re_test4("(.)", pcrecpp::UTF8());
+ CHECK(re_test4.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6\x97\xa5"));
+
+ // Check that string matches itself in either mode
+ RE re_test5(utf8_string);
+ CHECK(re_test5.FullMatch(utf8_string));
+ RE re_test6(utf8_string, pcrecpp::UTF8());
+ CHECK(re_test6.FullMatch(utf8_string));
+
+ // Check that pattern matches string only in UTF8 mode
+ RE re_test7(utf8_pattern);
+ CHECK(!re_test7.FullMatch(utf8_string));
+ RE re_test8(utf8_pattern, pcrecpp::UTF8());
+ CHECK(re_test8.FullMatch(utf8_string));
+ }
+
+ // Check that ungreedy, UTF8 regular expressions don't match when they
+ // oughtn't -- see bug 82246.
+ {
+ // This code always worked.
+ const char* pattern = "\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+
+ {
+ const char* pattern = "(?U)\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+#endif /* def SUPPORT_UTF8 */
+
+ printf("Testing error reporting\n");
+
+ { RE re("a\\1"); CHECK(!re.error().empty()); }
+ {
+ RE re("a[x");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[z-a]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[[:foobar:]]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a(b");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a\\");
+ CHECK(!re.error().empty());
+ }
+
+ // Test that recursion is stopped
+ TestRecursion();
+
+ // Test Options
+ if (getenv("VERBOSE_TEST") != NULL)
+ VERBOSE_TEST = true;
+ TestOptions();
+
+ // Test the constructors
+ TestConstructors();
+
+ // Done
+ printf("OK\n");
+
+ return 0;
+}