aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAddison Crump <addison.crump@cispa.de>2024-02-15 18:02:49 +0100
committerGitHub <noreply@github.com>2024-02-15 17:02:49 +0000
commitbd1eec778610f8f4c8f07277e1b63ce9bf876888 (patch)
tree0ebf8df6f7af4857d71b7607f53cf2c26999bcc3
parentaa51f007394b57a569db2fc70f25ff6a1334543a (diff)
downloadpcre-bd1eec778610f8f4c8f07277e1b63ce9bf876888.tar.gz
Fuzzer: fix JIT fuzzing (#322)
* include config, enrich differential output * fix additional misunderstanding * improve output for clarity * ignore callout errors * support 8-, 16-, and 32-bit modes * try to enable MSAN support with JIT * expand stack size implicitly * use 256MB stack (!) to avoid overflow with link-size 4 * try to make some dictionaries for 16-, 32-bit modes * add to options * disable recurseloop check in fuzzer
-rw-r--r--.gitignore3
-rw-r--r--Makefile.am52
-rw-r--r--pcre2_fuzzer_16.dict50
-rw-r--r--pcre2_fuzzer_16.options2
-rw-r--r--pcre2_fuzzer_32.dict50
-rw-r--r--pcre2_fuzzer_32.options2
-rw-r--r--src/pcre2_fuzzsupport.c334
-rw-r--r--src/pcre2_jit_compile.c8
-rw-r--r--src/pcre2_jit_match.c13
-rw-r--r--src/sljit/sljitConfigInternal.h13
-rw-r--r--src/sljit/sljitNativeX86_common.c13
11 files changed, 449 insertions, 91 deletions
diff --git a/.gitignore b/.gitignore
index e55689a2..b69a1d82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,7 +50,8 @@ pcre2posix_test.exe
pcre2posix_test.log
pcre2posix_test.trs
pcre2demo
-pcre2fuzzcheck
+pcre2fuzzcheck-*
+pcre2fuzzer-*
pcre2grep
pcre2grep.exe
pcre2test
diff --git a/Makefile.am b/Makefile.am
index 13d1bc8c..3e3eeb4f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -540,28 +540,64 @@ if WITH_GCOV
pcre2grep_CFLAGS += $(GCOV_CFLAGS)
pcre2grep_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
+endif # WITH_PCRE2_8
## If fuzzer support is enabled, build a non-distributed library containing the
## fuzzing function. Also build the standalone checking binary from the same
## source but using -DSTANDALONE.
if WITH_FUZZ_SUPPORT
-noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a
+noinst_LIBRARIES =
+if WITH_PCRE2_8
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a
_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c
_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS)
_libs_libpcre2_fuzzsupport_a_LIBADD =
-noinst_PROGRAMS += pcre2fuzzcheck
-pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c
-pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
-pcre2fuzzcheck_LDADD = libpcre2-8.la
+noinst_PROGRAMS += pcre2fuzzcheck-8
+pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
+pcre2fuzzcheck_8_LDADD = libpcre2-8.la
if WITH_GCOV
-pcre2fuzzcheck_CFLAGS += $(GCOV_CFLAGS)
-pcre2fuzzcheck_LDADD += $(GCOV_LIBS)
+pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
-endif # WITH FUZZ_SUPPORT
endif # WITH_PCRE2_8
+if WITH_PCRE2_16
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a
+_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c
+_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
+_libs_libpcre2_fuzzsupport_16_a_LIBADD =
+
+noinst_PROGRAMS += pcre2fuzzcheck-16
+pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
+pcre2fuzzcheck_16_LDADD = libpcre2-16.la
+if WITH_GCOV
+pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS)
+endif # WITH_GCOV
+endif # WITH_PCRE2_16
+
+if WITH_PCRE2_32
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a
+_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c
+_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
+_libs_libpcre2_fuzzsupport_32_a_LIBADD =
+
+noinst_PROGRAMS += pcre2fuzzcheck-32
+pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
+pcre2fuzzcheck_32_LDADD = libpcre2-32.la
+if WITH_GCOV
+pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS)
+endif # WITH_GCOV
+endif # WITH_PCRE2_32
+
+endif # WITH_FUZZ_SUPPORT
+
## -------- Testing ----------
## If the 8-bit library is enabled, build the POSIX wrapper test program and
diff --git a/pcre2_fuzzer_16.dict b/pcre2_fuzzer_16.dict
new file mode 100644
index 00000000..100a5b71
--- /dev/null
+++ b/pcre2_fuzzer_16.dict
@@ -0,0 +1,50 @@
+# This is attempt at a fuzzer dictionary for PCRE2.
+
+"\\\x00A\x00"
+"\\\x00b\x00"
+"\\\x00B\x00"
+"\\\x00d\x00"
+"\\\x00D\x00"
+"\\\x00h\x00"
+"\\\x00H\x00"
+"\\\x00n\x00"
+"\\\x00N\x00"
+"\\\x00s\x00"
+"\\\x00S\x00"
+"\\\x00w\x00"
+"\\\x00W\x00"
+"\\\x00z\x00"
+"\\\x00Z\x00"
+
+"(\x00?\x00"
+"(\x00?\x00:\x00"
+"(\x00?\x00>\x00"
+"(\x00?\x00=\x00"
+"(\x00?\x00!\x00"
+"(\x00?\x00<\x00=\x00"
+"(\x00?\x00<\x00!\x00"
+"(\x00?\x00|\x00"
+
+"[\x00:\x00a\x00l\x00n\x00u\x00m\x00:\x00]\x00"
+"[\x00:\x00a\x00l\x00p\x00h\x00a\x00:\x00]\x00"
+"[\x00:\x00a\x00s\x00c\x00i\x00i\x00:\x00]\x00"
+"[\x00:\x00b\x00l\x00a\x00n\x00k\x00:\x00]\x00"
+"[\x00:\x00c\x00n\x00t\x00r\x00l\x00:\x00]\x00"
+"[\x00:\x00d\x00i\x00g\x00i\x00t\x00:\x00]\x00"
+"[\x00:\x00g\x00r\x00a\x00p\x00h\x00:\x00]\x00"
+"[\x00:\x00l\x00o\x00w\x00e\x00r\x00:\x00]\x00"
+"[\x00:\x00p\x00r\x00i\x00n\x00t\x00:\x00]\x00"
+"[\x00:\x00p\x00u\x00n\x00c\x00t\x00:\x00]\x00"
+"[\x00:\x00s\x00p\x00a\x00c\x00e\x00:\x00]\x00"
+"[\x00:\x00u\x00p\x00p\x00e\x00r\x00:\x00]\x00"
+"[\x00:\x00w\x00o\x00r\x00d\x00:\x00]\x00"
+"[\x00:\x00x\x00d\x00i\x00g\x00i\x00t\x00:\x00]\x00"
+
+"(\x00*\x00A\x00C\x00C\x00E\x00P\x00T\x00)\x00"
+"(\x00*\x00F\x00A\x00I\x00L\x00)\x00"
+"(\x00*\x00C\x00O\x00M\x00M\x00I\x00T\x00)\x00"
+"(\x00*\x00P\x00R\x00U\x00N\x00E\x00)\x00"
+"(\x00*\x00S\x00K\x00I\x00P\x00)\x00"
+"(\x00*\x00T\x00H\x00E\x00N\x00)\x00"
+
+# End
diff --git a/pcre2_fuzzer_16.options b/pcre2_fuzzer_16.options
new file mode 100644
index 00000000..1b6d2e67
--- /dev/null
+++ b/pcre2_fuzzer_16.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+dict = pcre2_fuzzer_16.dict
diff --git a/pcre2_fuzzer_32.dict b/pcre2_fuzzer_32.dict
new file mode 100644
index 00000000..8b962d5d
--- /dev/null
+++ b/pcre2_fuzzer_32.dict
@@ -0,0 +1,50 @@
+# This is attempt at a fuzzer dictionary for PCRE2.
+
+"\\\x00\x00\x00A\x00\x00\x00"
+"\\\x00\x00\x00b\x00\x00\x00"
+"\\\x00\x00\x00B\x00\x00\x00"
+"\\\x00\x00\x00d\x00\x00\x00"
+"\\\x00\x00\x00D\x00\x00\x00"
+"\\\x00\x00\x00h\x00\x00\x00"
+"\\\x00\x00\x00H\x00\x00\x00"
+"\\\x00\x00\x00n\x00\x00\x00"
+"\\\x00\x00\x00N\x00\x00\x00"
+"\\\x00\x00\x00s\x00\x00\x00"
+"\\\x00\x00\x00S\x00\x00\x00"
+"\\\x00\x00\x00w\x00\x00\x00"
+"\\\x00\x00\x00W\x00\x00\x00"
+"\\\x00\x00\x00z\x00\x00\x00"
+"\\\x00\x00\x00Z\x00\x00\x00"
+
+"(\x00\x00\x00?\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00:\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00>\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00=\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00!\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00<\x00\x00\x00=\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00<\x00\x00\x00!\x00\x00\x00"
+"(\x00\x00\x00?\x00\x00\x00|\x00\x00\x00"
+
+"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00l\x00\x00\x00n\x00\x00\x00u\x00\x00\x00m\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00l\x00\x00\x00p\x00\x00\x00h\x00\x00\x00a\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00s\x00\x00\x00c\x00\x00\x00i\x00\x00\x00i\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00b\x00\x00\x00l\x00\x00\x00a\x00\x00\x00n\x00\x00\x00k\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00c\x00\x00\x00n\x00\x00\x00t\x00\x00\x00r\x00\x00\x00l\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00d\x00\x00\x00i\x00\x00\x00g\x00\x00\x00i\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00g\x00\x00\x00r\x00\x00\x00a\x00\x00\x00p\x00\x00\x00h\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00l\x00\x00\x00o\x00\x00\x00w\x00\x00\x00e\x00\x00\x00r\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00p\x00\x00\x00r\x00\x00\x00i\x00\x00\x00n\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00p\x00\x00\x00u\x00\x00\x00n\x00\x00\x00c\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00c\x00\x00\x00e\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00u\x00\x00\x00p\x00\x00\x00p\x00\x00\x00e\x00\x00\x00r\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00w\x00\x00\x00o\x00\x00\x00r\x00\x00\x00d\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+"[\x00\x00\x00:\x00\x00\x00x\x00\x00\x00d\x00\x00\x00i\x00\x00\x00g\x00\x00\x00i\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00"
+
+"(\x00\x00\x00*\x00\x00\x00A\x00\x00\x00C\x00\x00\x00C\x00\x00\x00E\x00\x00\x00P\x00\x00\x00T\x00\x00\x00)\x00\x00\x00"
+"(\x00\x00\x00*\x00\x00\x00F\x00\x00\x00A\x00\x00\x00I\x00\x00\x00L\x00\x00\x00)\x00\x00\x00"
+"(\x00\x00\x00*\x00\x00\x00C\x00\x00\x00O\x00\x00\x00M\x00\x00\x00M\x00\x00\x00I\x00\x00\x00T\x00\x00\x00)\x00\x00\x00"
+"(\x00\x00\x00*\x00\x00\x00P\x00\x00\x00R\x00\x00\x00U\x00\x00\x00N\x00\x00\x00E\x00\x00\x00)\x00\x00\x00"
+"(\x00\x00\x00*\x00\x00\x00S\x00\x00\x00K\x00\x00\x00I\x00\x00\x00P\x00\x00\x00)\x00\x00\x00"
+"(\x00\x00\x00*\x00\x00\x00T\x00\x00\x00H\x00\x00\x00E\x00\x00\x00N\x00\x00\x00)\x00\x00\x00"
+
+# End
diff --git a/pcre2_fuzzer_32.options b/pcre2_fuzzer_32.options
new file mode 100644
index 00000000..14a007a4
--- /dev/null
+++ b/pcre2_fuzzer_32.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+dict = pcre2_fuzzer_32.dict
diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c
index a2585180..7decec55 100644
--- a/src/pcre2_fuzzsupport.c
+++ b/src/pcre2_fuzzsupport.c
@@ -13,8 +13,19 @@ Written by Philip Hazel, October 2016
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+/* stack size adjustment */
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#define STACK_SIZE_MB 256
+
+#ifndef PCRE2_CODE_UNIT_WIDTH
#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+
+#include "config.h"
#include "pcre2.h"
#define MAX_MATCH_SIZE 1000
@@ -36,6 +47,165 @@ Written by Philip Hazel, October 2016
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT)
+static void print_compile_options(FILE *stream, uint32_t compile_options)
+{
+fprintf(stream, "Compile options %.8x never_backslash_c", compile_options);
+fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
+ ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
+ ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
+ ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
+ ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
+ ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
+ ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
+ ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
+ ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
+ ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
+ ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
+ ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
+ ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
+ ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
+ ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
+ ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
+ ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
+ ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
+ ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
+ ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
+ ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
+ ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
+ ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
+ ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
+ ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
+ ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
+}
+
+static void print_match_options(FILE *stream, uint32_t match_options)
+{
+fprintf(stream, "Match options %.8x", match_options);
+fprintf(stream, "%s%s%s%s%s%s%s%s%s\n",
+ ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
+ ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
+ ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
+ ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
+ ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
+ ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
+ ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
+ ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
+ ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
+}
+
+static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data, pcre2_match_context *match_context)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+PCRE2_UCHAR error_buf[256];
+#endif
+int errorcode;
+
+for (uint32_t index = 0; index < count; index++)
+ {
+ PCRE2_UCHAR *bufferptr = NULL;
+ PCRE2_SIZE bufflen = 0;
+
+ errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr, &bufflen);
+
+ if (errorcode >= 0)
+ {
+ fprintf(stream, "Match %d (hex encoded): ", index);
+ for (PCRE2_SIZE i = 0; i < bufflen; i++)
+ {
+ fprintf(stream, "%02x", bufferptr[i]);
+ }
+ fprintf(stream, "\n");
+ }
+ else
+ {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ pcre2_get_error_message(errorcode, error_buf, 256);
+ fprintf(stream, "Match %d failed: %s\n", index, error_buf);
+#else
+ fprintf(stream, "Match %d failed: %d\n", index, errorcode);
+#endif
+ }
+ }
+}
+
+/* This function describes the current test case being evaluated, then aborts */
+
+#ifdef SUPPORT_JIT
+static void describe_failure(
+ const char *task,
+ const unsigned char *data,
+ size_t size,
+ uint32_t compile_options,
+ uint32_t match_options,
+ int errorcode,
+ int errorcode_jit,
+ int matches,
+ int matches_jit,
+ pcre2_match_data *match_data,
+ pcre2_match_data *match_data_jit,
+ pcre2_match_context *match_context
+) {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+PCRE2_UCHAR buffer[256];
+#endif
+
+fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
+
+fprintf(stderr, "Pattern/sample string (hex encoded): ");
+for (size_t i = 0; i < size; i++)
+ {
+ fprintf(stderr, "%02x", data[i]);
+ }
+fprintf(stderr, "\n");
+
+print_compile_options(stderr, compile_options);
+print_match_options(stderr, match_options);
+
+if (errorcode < 0)
+ {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ pcre2_get_error_message(errorcode, buffer, 256);
+ fprintf(stderr, "Non-JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode);
+#else
+ fprintf(stderr, "Non-JIT'd operation emitted an error: %d\n", errorcode);
+#endif
+ }
+if (matches >= 0)
+ {
+ fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
+ if (match_data != NULL)
+ {
+ fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
+ dump_matches(stderr, matches, match_data, match_context);
+ fprintf(stderr, "\n");
+ }
+ }
+
+if (errorcode_jit < 0)
+ {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ pcre2_get_error_message(errorcode_jit, buffer, 256);
+ fprintf(stderr, "JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode_jit);
+#else
+ fprintf(stderr, "JIT'd operation emitted an error: %d\n", errorcode);
+#endif
+ }
+if (matches_jit >= 0)
+ {
+ fprintf(stderr, "JIT'd operation did not emit an error.\n");
+ if (match_data_jit != NULL)
+ {
+ fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
+ dump_matches(stderr, matches_jit, match_data_jit, match_context);
+ fprintf(stderr, "\n");
+ }
+ }
+
+abort();
+}
+#endif
+
/* This is the callout function. Its only purpose is to halt matching if there
are more than 100 callouts, as one way of stopping too much time being spent on
fruitless matches. The callout data is a pointer to the counter. */
@@ -50,8 +220,31 @@ return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */
+int LLVMFuzzerInitialize(int *, char ***);
+
int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
+int LLVMFuzzerInitialize(int *argc, char ***argv)
+{
+int rc;
+struct rlimit rlim;
+getrlimit(RLIMIT_STACK, &rlim);
+rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
+if (rlim.rlim_cur > rlim.rlim_max)
+ {
+ fprintf(stderr, "hard stack size limit is too small (needed 8MiB)!\n");
+ _exit(1);
+ }
+rc = setrlimit(RLIMIT_STACK, &rlim);
+if (rc != 0)
+ {
+ fprintf(stderr, "failed to expand stack size\n");
+ _exit(1);
+ }
+
+return 0;
+}
+
/* Here's the driving function. */
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
@@ -76,6 +269,7 @@ in large trees taking too much time. */
random_options = *(uint64_t *)(data);
data += sizeof(random_options);
size -= sizeof(random_options);
+size /= PCRE2_CODE_UNIT_WIDTH / 8;
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
@@ -87,7 +281,9 @@ reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
PCRE2_NEVER_BACKSLASH_C;
-match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) | PCRE2_NO_JIT;
+match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
+ PCRE2_NO_JIT |
+ PCRE2_DISABLE_RECURSELOOP_CHECK;
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
allowed together and just give an immediate error return. */
@@ -104,40 +300,14 @@ for (i = 0; i < 2; i++)
int errorcode;
#ifdef SUPPORT_JIT
int errorcode_jit;
- uint32_t ovector_count;
+ int matches = 0;
+ int matches_jit = 0;
#endif
PCRE2_SIZE erroroffset;
pcre2_code *code;
#ifdef STANDALONE
- printf("Compile options %.8x never_backslash_c", compile_options);
- printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
- ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
- ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
- ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
- ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
- ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
- ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
- ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
- ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
- ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
- ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
- ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
- ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
- ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
- ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
- ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
- ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
- ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
- ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
- ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
- ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
- ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
- ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
- ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
- ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
- ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
- ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
+ print_compile_options(stdout, compile_options);
#endif
code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
@@ -169,7 +339,7 @@ for (i = 0; i < 2; i++)
#endif
{
#ifdef STANDALONE
- printf("** Failed to create match data block\n");
+ fprintf(stderr, "** Failed to create match data block\n");
#endif
abort();
}
@@ -181,7 +351,7 @@ for (i = 0; i < 2; i++)
if (match_context == NULL)
{
#ifdef STANDALONE
- printf("** Failed to create match context block\n");
+ fprintf(stderr, "** Failed to create match context block\n");
#endif
abort();
}
@@ -195,18 +365,7 @@ for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
{
#ifdef STANDALONE
- printf("Match options %.8x", match_options);
- printf("%s%s%s%s%s%s%s%s%s%s\n",
- ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
- ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
- ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
- ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
- ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
- ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
- ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
- ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
- ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
- ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
+ print_match_options(stdout, match_options);
#endif
callout_count = 0;
@@ -216,9 +375,13 @@ for (i = 0; i < 2; i++)
#ifdef STANDALONE
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
{
+#if PCRE2_CODE_UNIT_WIDTH == 8
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
printf("Match failed: error %d: %s\n", errorcode, buffer);
+#else
+ printf("Match failed: error %d\n", errorcode);
+#endif
}
#endif
@@ -229,54 +392,52 @@ for (i = 0; i < 2; i++)
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
- if (errorcode_jit != errorcode)
- {
- printf("JIT errorcode %d did not match original errorcode %d\n", errorcode_jit, errorcode);
- abort();
- }
-
- ovector_count = pcre2_get_ovector_count(match_data);
+ matches = errorcode;
+ matches_jit = errorcode_jit;
- if (ovector_count != pcre2_get_ovector_count(match_data_jit))
+ if (errorcode_jit != errorcode)
{
- puts("JIT ovector count did not match original");
- abort();
+ if (!(errorcode < 0 && errorcode_jit < 0) &&
+ errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
+ errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
+ {
+ describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context);
+ }
}
-
- for (uint32_t ovector = 0; ovector < ovector_count; ovector++)
+ else
{
- PCRE2_UCHAR *bufferptr, *bufferptr_jit;
- PCRE2_SIZE bufflen, bufflen_jit;
+ for (int index = 0; index < errorcode; index++)
+ {
+ PCRE2_UCHAR *bufferptr, *bufferptr_jit;
+ PCRE2_SIZE bufflen, bufflen_jit;
- bufferptr = bufferptr_jit = NULL;
- bufflen = bufflen_jit = 0;
+ bufferptr = bufferptr_jit = NULL;
+ bufflen = bufflen_jit = 0;
- errorcode = pcre2_substring_get_bynumber(match_data, ovector, &bufferptr, &bufflen);
- errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, ovector, &bufferptr_jit, &bufflen_jit);
+ errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
+ errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
- if (errorcode != errorcode_jit)
- {
- printf("when extracting substring, JIT errorcode %d did not match original %d\n", errorcode_jit, errorcode);
- abort();
- }
-
- if (errorcode >= 0)
- {
- if (bufflen != bufflen_jit)
+ if (errorcode != errorcode_jit)
{
- printf("when extracting substring, JIT buffer length %zu did not match original %zu\n", bufflen_jit, bufflen);
- abort();
+ describe_failure("match entry errorcode comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context);
}
- if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
+ if (errorcode >= 0)
{
- puts("when extracting substring, JIT buffer contents did not match original");
- abort();
+ if (bufflen != bufflen_jit)
+ {
+ describe_failure("match entry length comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context);
+ }
+
+ if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
+ {
+ describe_failure("match entry content comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context);
+ }
}
- }
- pcre2_substring_free(bufferptr);
- pcre2_substring_free(bufferptr_jit);
+ pcre2_substring_free(bufferptr);
+ pcre2_substring_free(bufferptr_jit);
+ }
}
}
#endif
@@ -312,9 +473,13 @@ for (i = 0; i < 2; i++)
#ifdef STANDALONE
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
{
+#if PCRE2_CODE_UNIT_WIDTH == 8
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
printf("Match failed: error %d: %s\n", errorcode, buffer);
+#else
+ printf("Match failed: error %d\n", errorcode);
+#endif
}
#endif
@@ -329,12 +494,17 @@ for (i = 0; i < 2; i++)
else
{
+#ifdef STANDALONE
+#if PCRE2_CODE_UNIT_WIDTH == 8
unsigned char buffer[256];
pcre2_get_error_message(errorcode, buffer, 256);
-#ifdef STANDALONE
printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
#else
- if (strstr((const char *)buffer, "internal error") != NULL) abort();
+ printf("Error %d at offset %lu\n", errorcode, erroroffset);
+#endif
+
+#else
+ if (errorcode == PCRE2_ERROR_INTERNAL) abort();
#endif
}
@@ -358,6 +528,8 @@ int main(int argc, char **argv)
{
int i;
+LLVMFuzzerInitialize(&argc, &argv);
+
if (argc < 2)
{
printf("** No arguments given\n");
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index fbb30a4a..050063ec 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -43,6 +43,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
#include "pcre2_internal.h"
#ifdef SUPPORT_JIT
@@ -9830,7 +9836,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
return cc + 1 + LINK_SIZE;
}
-static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
+static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{
PCRE2_SPTR begin;
PCRE2_SIZE *ovector;
diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c
index 1663a1e6..ae5903e2 100644
--- a/src/pcre2_jit_match.c
+++ b/src/pcre2_jit_match.c
@@ -42,6 +42,12 @@ POSSIBILITY OF SUCH DAMAGE.
#error This file must be included from pcre2_jit_compile.c.
#endif
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
#ifdef SUPPORT_JIT
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
@@ -179,6 +185,13 @@ match_data->rightchar = 0;
match_data->mark = arguments.mark_ptr;
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+if (rc > 0)
+ __msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0]));
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
return match_data->rc;
#endif /* SUPPORT_JIT */
diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h
index d224248c..ce4e7b04 100644
--- a/src/sljit/sljitConfigInternal.h
+++ b/src/sljit/sljitConfigInternal.h
@@ -522,6 +522,19 @@ typedef double sljit_f64;
#define SLJIT_FUNC
#endif /* !SLJIT_FUNC */
+/* Disable instrumentation for these functions as they may not be sound */
+#ifndef SLJIT_FUNC_ATTRIBUTE
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define SLJIT_FUNC_ATTRIBUTE __attribute__((no_sanitize("memory")))
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+#endif
+
+#ifndef SLJIT_FUNC_ATTRIBUTE
+#define SLJIT_FUNC_ATTRIBUTE
+#endif
+
#ifndef SLJIT_INDIRECT_CALL
#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \
|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c
index 369d8285..c2c04213 100644
--- a/src/sljit/sljitNativeX86_common.c
+++ b/src/sljit/sljitNativeX86_common.c
@@ -24,6 +24,12 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
return "x86" SLJIT_CPUINFO;
@@ -484,6 +490,13 @@ static void execute_cpu_id(sljit_u32 info[4])
}
#endif /* _MSC_VER && _MSC_VER >= 1400 */
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+__msan_unpoison(info, 4 * sizeof(sljit_u32));
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
}
static void get_cpu_features(void)