diff options
author | Ulan Degenbaev <ulan@chromium.org> | 2014-12-23 14:13:48 +0100 |
---|---|---|
committer | Ulan Degenbaev <ulan@chromium.org> | 2014-12-23 13:14:10 +0000 |
commit | f4826bb31edc73bdd057d25b44ed6407c472a694 (patch) | |
tree | 94183693892eb992f72dda035414ca7d53524c07 | |
parent | e6716a300970387285a6620dc637ebd199563643 (diff) | |
download | v8-f4826bb31edc73bdd057d25b44ed6407c472a694.tar.gz |
Version 3.30.33.12 (cherry-pick)
Merged d287f225a3630e71a6d14009e7baf71f465be861
Limit code size generated for very large regexps
R=machenbach@chromium.org
BUG=chromium:440913
LOG=N
Review URL: https://codereview.chromium.org/825673002
Cr-Commit-Position: refs/branch-heads/3.30@{#25266}
-rw-r--r-- | src/jsregexp.cc | 46 | ||||
-rw-r--r-- | src/jsregexp.h | 5 | ||||
-rw-r--r-- | src/version.cc | 2 | ||||
-rw-r--r-- | test/cctest/test-heap.cc | 58 |
4 files changed, 88 insertions, 23 deletions
diff --git a/src/jsregexp.cc b/src/jsregexp.cc index d078d07d5..8111a127e 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -1027,6 +1027,8 @@ class RegExpCompiler { inline bool ignore_case() { return ignore_case_; } inline bool one_byte() { return one_byte_; } + inline bool optimize() { return optimize_; } + inline void set_optimize(bool value) { optimize_ = value; } FrequencyCollator* frequency_collator() { return &frequency_collator_; } int current_expansion_factor() { return current_expansion_factor_; } @@ -1047,6 +1049,7 @@ class RegExpCompiler { bool ignore_case_; bool one_byte_; bool reg_exp_too_big_; + bool optimize_; int current_expansion_factor_; FrequencyCollator frequency_collator_; Zone* zone_; @@ -1079,6 +1082,7 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, ignore_case_(ignore_case), one_byte_(one_byte), reg_exp_too_big_(false), + optimize_(FLAG_regexp_optimization), current_expansion_factor_(1), frequency_collator_(), zone_(zone) { @@ -1094,16 +1098,6 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( Handle<String> pattern) { Heap* heap = pattern->GetHeap(); - bool use_slow_safe_regexp_compiler = false; - if (heap->total_regexp_code_generated() > - RegExpImpl::kRegWxpCompiledLimit && - heap->isolate()->memory_allocator()->SizeExecutable() > - RegExpImpl::kRegExpExecutableMemoryLimit) { - use_slow_safe_regexp_compiler = true; - } - - macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); - #ifdef DEBUG if (FLAG_trace_regexp_assembler) macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); @@ -2257,8 +2251,7 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, // We are being asked to make a non-generic version. Keep track of how many // non-generic versions we generate so as not to overdo it. trace_count_++; - if (FLAG_regexp_optimization && - trace_count_ < kMaxCopiesCodeGenerated && + if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated && compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { return CONTINUE; } @@ -4137,15 +4130,12 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler, } alt_gen->expects_preload = preload->preload_is_current_; bool generate_full_check_inline = false; - if (FLAG_regexp_optimization && + if (compiler->optimize() && try_to_emit_quick_check_for_alternative(i == 0) && - alternative.node()->EmitQuickCheck(compiler, - trace, - &new_trace, - preload->preload_has_checked_bounds_, - &alt_gen->possible_success, - &alt_gen->quick_check_details, - fall_through_on_failure)) { + alternative.node()->EmitQuickCheck( + compiler, trace, &new_trace, preload->preload_has_checked_bounds_, + &alt_gen->possible_success, &alt_gen->quick_check_details, + fall_through_on_failure)) { // Quick check was generated for this choice. preload->preload_is_current_ = true; preload->preload_has_checked_bounds_ = true; @@ -4943,7 +4933,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, if (body_can_be_empty) { body_start_reg = compiler->AllocateRegister(); - } else if (FLAG_regexp_optimization && !needs_capture_clearing) { + } else if (compiler->optimize() && !needs_capture_clearing) { // Only unroll if there are no captures and the body can't be // empty. { @@ -6041,6 +6031,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( } RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone); + compiler.set_optimize(!TooMuchRegExpCode(pattern)); + // Sample some characters from the middle of the string. static const int kSampleSize = 128; @@ -6143,6 +6135,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( RegExpMacroAssemblerIrregexp macro_assembler(codes, zone); #endif // V8_INTERPRETED_REGEXP + macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern)); + // Inserted here, instead of in Assembler, because it depends on information // in the AST that isn't replicated in the Node structure. static const int kMaxBacksearchLimit = 1024; @@ -6166,4 +6160,14 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( } +bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { + Heap* heap = pattern->GetHeap(); + bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; + if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit && + heap->isolate()->memory_allocator()->SizeExecutable() > + RegExpImpl::kRegExpExecutableMemoryLimit) { + too_much = true; + } + return too_much; +} }} // namespace v8::internal diff --git a/src/jsregexp.h b/src/jsregexp.h index c65adea4c..c81ccb21d 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -213,7 +213,8 @@ class RegExpImpl { // total regexp code compiled including code that has subsequently been freed // and the total executable memory at any point. static const int kRegExpExecutableMemoryLimit = 16 * MB; - static const int kRegWxpCompiledLimit = 1 * MB; + static const int kRegExpCompiledLimit = 1 * MB; + static const int kRegExpTooLargeToOptimize = 10 * KB; private: static bool CompileIrregexp(Handle<JSRegExp> re, @@ -1666,6 +1667,8 @@ class RegExpEngine: public AllStatic { Handle<String> sample_subject, bool is_one_byte, Zone* zone); + static bool TooMuchRegExpCode(Handle<String> pattern); + static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); }; diff --git a/src/version.cc b/src/version.cc index 057c7e4d1..63f7b6207 100644 --- a/src/version.cc +++ b/src/version.cc @@ -35,7 +35,7 @@ #define MAJOR_VERSION 3 #define MINOR_VERSION 30 #define BUILD_NUMBER 33 -#define PATCH_LEVEL 11 +#define PATCH_LEVEL 12 // Use 1 for candidates and 0 otherwise. // (Boolean macro values are not supported by all preprocessors.) #define IS_CANDIDATE_VERSION 0 diff --git a/test/cctest/test-heap.cc b/test/cctest/test-heap.cc index 0d43c0634..cc44c393b 100644 --- a/test/cctest/test-heap.cc +++ b/test/cctest/test-heap.cc @@ -1694,6 +1694,64 @@ TEST(TestInternalWeakListsTraverseWithGC) { } +TEST(TestSizeOfRegExpCode) { + if (!FLAG_regexp_optimization) return; + + v8::V8::Initialize(); + + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + LocalContext context; + + // Adjust source below and this check to match + // RegExpImple::kRegExpTooLargeToOptimize. + DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB); + + // Compile a regexp that is much larger if we are using regexp optimizations. + CompileRun( + "var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';" + "var half_size_reg_exp;" + "while (reg_exp_source.length < 10 * 1024) {" + " half_size_reg_exp = reg_exp_source;" + " reg_exp_source = reg_exp_source + reg_exp_source;" + "}" + // Flatten string. + "reg_exp_source.match(/f/);"); + + // Get initial heap size after several full GCs, which will stabilize + // the heap size and return with sweeping finished completely. + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + MarkCompactCollector* collector = CcTest::heap()->mark_compact_collector(); + if (collector->sweeping_in_progress()) { + collector->EnsureSweepingCompleted(); + } + int initial_size = static_cast<int>(CcTest::heap()->SizeOfObjects()); + + CompileRun("'foo'.match(reg_exp_source);"); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + int size_with_regexp = static_cast<int>(CcTest::heap()->SizeOfObjects()); + + CompileRun("'foo'.match(half_size_reg_exp);"); + CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags); + int size_with_optimized_regexp = + static_cast<int>(CcTest::heap()->SizeOfObjects()); + + int size_of_regexp_code = size_with_regexp - initial_size; + + CHECK_LE(size_of_regexp_code, 500 * KB); + + // Small regexp is half the size, but compiles to more than twice the code + // due to the optimization steps. + CHECK_GE(size_with_optimized_regexp, + size_with_regexp + size_of_regexp_code * 2); +} + + TEST(TestSizeOfObjects) { v8::V8::Initialize(); |