aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Wankadia <junyer@google.com>2019-01-25 01:50:33 -0800
committerPaul Wankadia <junyer@google.com>2019-01-25 09:51:52 +0000
commita8176127eebcfef98db8ef9a22d1fa708b196e7c (patch)
tree8ca468e856e7accf3db6f4cdee4acc7008d70907
parentb8e208581b53fb142b873a4900d0b7ef40539c69 (diff)
downloadregex-re2-a8176127eebcfef98db8ef9a22d1fa708b196e7c.tar.gz
Crudely limit the use of various character classes when fuzzing.
Change-Id: I383a5e3c2dd41c4d2cb58268cea7c65e10729850 Reviewed-on: https://code-review.googlesource.com/c/37770 Reviewed-by: Paul Wankadia <junyer@google.com>
-rw-r--r--re2/fuzzing/re2_fuzzer.cc21
1 files changed, 11 insertions, 10 deletions
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 084b5c0..83971a1 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -106,28 +106,29 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size == 0 || size > 999)
return 0;
- // Crudely limit the use of ., \p and \P.
- // Otherwise, we will waste time on inputs that have long runs of Unicode
+ // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
+ // Otherwise, we will waste time on inputs that have long runs of various
// character classes. The fuzzer has shown itself to be easily capable of
// generating such patterns that fall within the other limits, but result
// in timeouts nonetheless. The marginal cost is high - even more so when
// counted repetition is involved - whereas the marginal benefit is zero.
- int dot = 0;
- int backslash_p = 0;
+ // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
+ int cc = 0;
for (size_t i = 0; i < size; i++) {
if (data[i] == '.')
- dot++;
+ cc++;
if (data[i] != '\\')
continue;
i++;
if (i >= size)
break;
- if (data[i] == 'p' || data[i] == 'P')
- backslash_p++;
+ if (data[i] == 'p' || data[i] == 'P' ||
+ data[i] == 'd' || data[i] == 'D' ||
+ data[i] == 's' || data[i] == 'S' ||
+ data[i] == 'w' || data[i] == 'W')
+ cc++;
}
- if (dot > 99)
- return 0;
- if (backslash_p > 1)
+ if (cc > 9)
return 0;
// The one-at-a-time hash by Bob Jenkins.