diff options
Diffstat (limited to 'src/share/vm/gc_implementation')
64 files changed, 2749 insertions, 1982 deletions
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp index 91d935708..de5555fb7 100644 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -598,7 +598,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen, _collector_policy(cp), _should_unload_classes(CMSClassUnloadingEnabled), _concurrent_cycles_since_last_unload(0), - _roots_scanning_options(SharedHeap::SO_None), + _roots_scanning_options(GenCollectedHeap::SO_None), _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding), _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding), _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) CMSTracer()), @@ -3068,7 +3068,7 @@ void CMSCollector::verify_after_remark_work_1() { gch->gen_process_roots(_cmsGen->level(), true, // younger gens are roots true, // activate StrongRootsScope - SharedHeap::ScanningOption(roots_scanning_options()), + GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), ¬Older, NULL, @@ -3136,7 +3136,7 @@ void CMSCollector::verify_after_remark_work_2() { gch->gen_process_roots(_cmsGen->level(), true, // younger gens are roots true, // activate StrongRootsScope - SharedHeap::ScanningOption(roots_scanning_options()), + GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), ¬Older, NULL, @@ -3327,7 +3327,7 @@ bool ConcurrentMarkSweepGeneration::is_too_full() const { void CMSCollector::setup_cms_unloading_and_verification_state() { const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC || VerifyBeforeExit; - const int rso = SharedHeap::SO_AllCodeCache; + const int rso = GenCollectedHeap::SO_AllCodeCache; // We set the proper root for this CMS cycle here. if (should_unload_classes()) { // Should unload classes this cycle @@ -3339,9 +3339,11 @@ void CMSCollector::setup_cms_unloading_and_verification_state() { // Not unloading classes this cycle assert(!should_unload_classes(), "Inconsitency!"); + // If we are not unloading classes then add SO_AllCodeCache to root + // scanning options. + add_root_scanning_option(rso); + if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) { - // Include symbols, strings and code cache elements to prevent their resurrection. - add_root_scanning_option(rso); set_verifying(true); } else if (verifying() && !should_verify) { // We were verifying, but some verification flags got disabled. @@ -3753,7 +3755,7 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) { gch->gen_process_roots(_cmsGen->level(), true, // younger gens are roots true, // activate StrongRootsScope - SharedHeap::ScanningOption(roots_scanning_options()), + GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), ¬Older, NULL, @@ -5254,13 +5256,13 @@ void CMSParInitialMarkTask::work(uint worker_id) { gch->gen_process_roots(_collector->_cmsGen->level(), false, // yg was scanned above false, // this is parallel code - SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), + GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), _collector->should_unload_classes(), &par_mri_cl, NULL, &cld_closure); assert(_collector->should_unload_classes() - || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache), + || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache), "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops"); _timer.stop(); if (PrintCMSStatistics != 0) { @@ -5390,14 +5392,14 @@ void CMSParRemarkTask::work(uint worker_id) { gch->gen_process_roots(_collector->_cmsGen->level(), false, // yg was scanned above false, // this is parallel code - SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), + GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), _collector->should_unload_classes(), &par_mrias_cl, NULL, NULL); // The dirty klasses will be handled below assert(_collector->should_unload_classes() - || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache), + || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache), "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops"); _timer.stop(); if (PrintCMSStatistics != 0) { @@ -5982,14 +5984,14 @@ void CMSCollector::do_remark_non_parallel() { gch->gen_process_roots(_cmsGen->level(), true, // younger gens as roots false, // use the local StrongRootsScope - SharedHeap::ScanningOption(roots_scanning_options()), + GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), &mrias_cl, NULL, NULL); // The dirty klasses will be handled below assert(should_unload_classes() - || (roots_scanning_options() & SharedHeap::SO_AllCodeCache), + || (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache), "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops"); } @@ -6634,7 +6636,6 @@ void CMSCollector::reset(bool asynch) { } void CMSCollector::do_CMS_operation(CMS_op_type op, GCCause::Cause gc_cause) { - gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); GCTraceTime t(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer_cm->gc_id()); TraceCollectorStats tcs(counters()); diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp index 982f7c103..7dac0c2ae 100644 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -130,8 +130,8 @@ class VM_CMS_Final_Remark: public VM_CMS_Operation { class VM_GenCollectFullConcurrent: public VM_GC_Operation { bool _disabled_icms; public: - VM_GenCollectFullConcurrent(unsigned int gc_count_before, - unsigned int full_gc_count_before, + VM_GenCollectFullConcurrent(uint gc_count_before, + uint full_gc_count_before, GCCause::Cause gc_cause) : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true /* full */), _disabled_icms(false) diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp index 1a46ed3f5..2a0a563d5 100644 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,7 @@ nonstatic_field(LinearAllocBlock, _word_size, size_t) \ nonstatic_field(AFLBinaryTreeDictionary, _total_size, size_t) \ nonstatic_field(CompactibleFreeListSpace, _dictionary, AFLBinaryTreeDictionary*) \ - nonstatic_field(CompactibleFreeListSpace, _indexedFreeList[0], FreeList<FreeChunk>) \ + nonstatic_field(CompactibleFreeListSpace, _indexedFreeList[0], AdaptiveFreeList<FreeChunk>) \ nonstatic_field(CompactibleFreeListSpace, _smallLinearAllocBlock, LinearAllocBlock) diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp index 85b1ff5d4..92261912a 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -114,7 +114,7 @@ void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { } size_t CMBitMap::compute_size(size_t heap_size) { - return heap_size / mark_distance(); + return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); } size_t CMBitMap::mark_distance() { @@ -2640,24 +2640,41 @@ void ConcurrentMark::swapMarkBitMaps() { _nextMarkBitMap = (CMBitMap*) temp; } -class CMObjectClosure; - -// Closure for iterating over objects, currently only used for -// processing SATB buffers. -class CMObjectClosure : public ObjectClosure { +// Closure for marking entries in SATB buffers. +class CMSATBBufferClosure : public SATBBufferClosure { private: CMTask* _task; + G1CollectedHeap* _g1h; -public: - void do_object(oop obj) { - _task->deal_with_reference(obj); + // This is very similar to CMTask::deal_with_reference, but with + // more relaxed requirements for the argument, so this must be more + // circumspect about treating the argument as an object. + void do_entry(void* entry) const { + _task->increment_refs_reached(); + HeapRegion* hr = _g1h->heap_region_containing_raw(entry); + if (entry < hr->next_top_at_mark_start()) { + // Until we get here, we don't know whether entry refers to a valid + // object; it could instead have been a stale reference. + oop obj = static_cast<oop>(entry); + assert(obj->is_oop(true /* ignore mark word */), + err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj))); + _task->make_reference_grey(obj, hr); + } } - CMObjectClosure(CMTask* task) : _task(task) { } +public: + CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) + : _task(task), _g1h(g1h) { } + + virtual void do_buffer(void** buffer, size_t size) { + for (size_t i = 0; i < size; ++i) { + do_entry(buffer[i]); + } + } }; class G1RemarkThreadsClosure : public ThreadClosure { - CMObjectClosure _cm_obj; + CMSATBBufferClosure _cm_satb_cl; G1CMOopClosure _cm_cl; MarkingCodeBlobClosure _code_cl; int _thread_parity; @@ -2665,7 +2682,9 @@ class G1RemarkThreadsClosure : public ThreadClosure { public: G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) : - _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), + _cm_satb_cl(task, g1h), + _cm_cl(g1h, g1h->concurrent_mark(), task), + _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {} void do_thread(Thread* thread) { @@ -2681,11 +2700,11 @@ class G1RemarkThreadsClosure : public ThreadClosure { // live by the SATB invariant but other oops recorded in nmethods may behave differently. jt->nmethods_do(&_code_cl); - jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj); + jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); } } else if (thread->is_VM_thread()) { if (thread->claim_oops_do(_is_par, _thread_parity)) { - JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj); + JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); } } } @@ -3059,9 +3078,7 @@ ConcurrentMark::claim_region(uint worker_id) { #ifndef PRODUCT enum VerifyNoCSetOopsPhase { VerifyNoCSetOopsStack, - VerifyNoCSetOopsQueues, - VerifyNoCSetOopsSATBCompleted, - VerifyNoCSetOopsSATBThread + VerifyNoCSetOopsQueues }; class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { @@ -3074,8 +3091,6 @@ private: switch (_phase) { case VerifyNoCSetOopsStack: return "Stack"; case VerifyNoCSetOopsQueues: return "Queue"; - case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; - case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; default: ShouldNotReachHere(); } return NULL; @@ -3102,7 +3117,7 @@ public: virtual void do_oop(narrowOop* p) { // We should not come across narrow oops while scanning marking - // stacks and SATB buffers. + // stacks ShouldNotReachHere(); } @@ -3111,10 +3126,7 @@ public: } }; -void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, - bool verify_enqueued_buffers, - bool verify_thread_buffers, - bool verify_fingers) { +void ConcurrentMark::verify_no_cset_oops() { assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); if (!G1CollectedHeap::heap()->mark_in_progress()) { return; @@ -3122,65 +3134,47 @@ void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, VerifyNoCSetOopsClosure cl; - if (verify_stacks) { - // Verify entries on the global mark stack - cl.set_phase(VerifyNoCSetOopsStack); - _markStack.oops_do(&cl); + // Verify entries on the global mark stack + cl.set_phase(VerifyNoCSetOopsStack); + _markStack.oops_do(&cl); - // Verify entries on the task queues - for (uint i = 0; i < _max_worker_id; i += 1) { - cl.set_phase(VerifyNoCSetOopsQueues, i); - CMTaskQueue* queue = _task_queues->queue(i); - queue->oops_do(&cl); - } - } - - SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); - - // Verify entries on the enqueued SATB buffers - if (verify_enqueued_buffers) { - cl.set_phase(VerifyNoCSetOopsSATBCompleted); - satb_qs.iterate_completed_buffers_read_only(&cl); - } - - // Verify entries on the per-thread SATB buffers - if (verify_thread_buffers) { - cl.set_phase(VerifyNoCSetOopsSATBThread); - satb_qs.iterate_thread_buffers_read_only(&cl); - } - - if (verify_fingers) { - // Verify the global finger - HeapWord* global_finger = finger(); - if (global_finger != NULL && global_finger < _heap_end) { - // The global finger always points to a heap region boundary. We - // use heap_region_containing_raw() to get the containing region - // given that the global finger could be pointing to a free region - // which subsequently becomes continues humongous. If that - // happens, heap_region_containing() will return the bottom of the - // corresponding starts humongous region and the check below will - // not hold any more. - // Since we always iterate over all regions, we might get a NULL HeapRegion - // here. - HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); - guarantee(global_hr == NULL || global_finger == global_hr->bottom(), - err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, - p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); - } - - // Verify the task fingers - assert(parallel_marking_threads() <= _max_worker_id, "sanity"); - for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { - CMTask* task = _tasks[i]; - HeapWord* task_finger = task->finger(); - if (task_finger != NULL && task_finger < _heap_end) { - // See above note on the global finger verification. - HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); - guarantee(task_hr == NULL || task_finger == task_hr->bottom() || - !task_hr->in_collection_set(), - err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, - p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); - } + // Verify entries on the task queues + for (uint i = 0; i < _max_worker_id; i += 1) { + cl.set_phase(VerifyNoCSetOopsQueues, i); + CMTaskQueue* queue = _task_queues->queue(i); + queue->oops_do(&cl); + } + + // Verify the global finger + HeapWord* global_finger = finger(); + if (global_finger != NULL && global_finger < _heap_end) { + // The global finger always points to a heap region boundary. We + // use heap_region_containing_raw() to get the containing region + // given that the global finger could be pointing to a free region + // which subsequently becomes continues humongous. If that + // happens, heap_region_containing() will return the bottom of the + // corresponding starts humongous region and the check below will + // not hold any more. + // Since we always iterate over all regions, we might get a NULL HeapRegion + // here. + HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); + guarantee(global_hr == NULL || global_finger == global_hr->bottom(), + err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, + p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); + } + + // Verify the task fingers + assert(parallel_marking_threads() <= _max_worker_id, "sanity"); + for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { + CMTask* task = _tasks[i]; + HeapWord* task_finger = task->finger(); + if (task_finger != NULL && task_finger < _heap_end) { + // See above note on the global finger verification. + HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); + guarantee(task_hr == NULL || task_finger == task_hr->bottom() || + !task_hr->in_collection_set(), + err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, + p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); } } } @@ -3510,22 +3504,29 @@ void ConcurrentMark::print_finger() { } #endif -void CMTask::scan_object(oop obj) { +template<bool scan> +inline void CMTask::process_grey_object(oop obj) { + assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT, + gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT, _worker_id, p2i((void*) obj)); } size_t obj_size = obj->size(); _words_scanned += obj_size; - obj->oop_iterate(_cm_oop_closure); + if (scan) { + obj->oop_iterate(_cm_oop_closure); + } statsOnly( ++_objs_scanned ); check_limits(); } +template void CMTask::process_grey_object<true>(oop); +template void CMTask::process_grey_object<false>(oop); + // Closure for iteration over bitmaps class CMBitMapClosure : public BitMapClosure { private: @@ -3994,34 +3995,18 @@ void CMTask::drain_satb_buffers() { // very counter productive if it did that. :-) _draining_satb_buffers = true; - CMObjectClosure oc(this); + CMSATBBufferClosure satb_cl(this, _g1h); SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - if (G1CollectedHeap::use_parallel_gc_threads()) { - satb_mq_set.set_par_closure(_worker_id, &oc); - } else { - satb_mq_set.set_closure(&oc); - } // This keeps claiming and applying the closure to completed buffers // until we run out of buffers or we need to abort. - if (G1CollectedHeap::use_parallel_gc_threads()) { - while (!has_aborted() && - satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) { - if (_cm->verbose_medium()) { - gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); - } - statsOnly( ++_satb_buffers_processed ); - regular_clock_call(); - } - } else { - while (!has_aborted() && - satb_mq_set.apply_closure_to_completed_buffer()) { - if (_cm->verbose_medium()) { - gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); - } - statsOnly( ++_satb_buffers_processed ); - regular_clock_call(); + while (!has_aborted() && + satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { + if (_cm->verbose_medium()) { + gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); } + statsOnly( ++_satb_buffers_processed ); + regular_clock_call(); } _draining_satb_buffers = false; @@ -4030,12 +4015,6 @@ void CMTask::drain_satb_buffers() { concurrent() || satb_mq_set.completed_buffers_num() == 0, "invariant"); - if (G1CollectedHeap::use_parallel_gc_threads()) { - satb_mq_set.set_par_closure(_worker_id, NULL); - } else { - satb_mq_set.set_closure(NULL); - } - // again, this was a potentially expensive operation, decrease the // limits to get the regular clock call early decrease_limits(); diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.hpp index 8a1120e73..4c6262415 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp @@ -793,14 +793,9 @@ public: } // Verify that there are no CSet oops on the stacks (taskqueues / - // global mark stack), enqueued SATB buffers, per-thread SATB - // buffers, and fingers (global / per-task). The boolean parameters - // decide which of the above data structures to verify. If marking - // is not in progress, it's a no-op. - void verify_no_cset_oops(bool verify_stacks, - bool verify_enqueued_buffers, - bool verify_thread_buffers, - bool verify_fingers) PRODUCT_RETURN; + // global mark stack) and fingers (global / per-task). + // If marking is not in progress, it's a no-op. + void verify_no_cset_oops() PRODUCT_RETURN; bool isPrevMarked(oop p) const { assert(p != NULL && p->is_oop(), "expected an oop"); @@ -1108,6 +1103,12 @@ private: void regular_clock_call(); bool concurrent() { return _concurrent; } + // Test whether obj might have already been passed over by the + // mark bitmap scan, and so needs to be pushed onto the mark stack. + bool is_below_finger(oop obj, HeapWord* global_finger) const; + + template<bool scan> void process_grey_object(oop obj); + public: // It resets the task; it should be called right at the beginning of // a marking phase. @@ -1155,12 +1156,22 @@ public: void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure); - // It grays the object by marking it and, if necessary, pushing it - // on the local queue + // Increment the number of references this task has visited. + void increment_refs_reached() { ++_refs_reached; } + + // Grey the object by marking it. If not already marked, push it on + // the local queue if below the finger. + // Precondition: obj is in region. + // Precondition: obj is below region's NTAMS. + inline void make_reference_grey(oop obj, HeapRegion* region); + + // Grey the object (by calling make_grey_reference) if required, + // e.g. obj is below its containing region's NTAMS. + // Precondition: obj is a valid heap object. inline void deal_with_reference(oop obj); // It scans an object and visits its children. - void scan_object(oop obj); + void scan_object(oop obj) { process_grey_object<true>(obj); } // It pushes an object on the local queue. inline void push(oop obj); diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp index 0557cdbc6..4e84d49cd 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp @@ -259,14 +259,87 @@ inline void CMTask::push(oop obj) { ++_local_pushes ); } -// This determines whether the method below will check both the local -// and global fingers when determining whether to push on the stack a -// gray object (value 1) or whether it will only check the global one -// (value 0). The tradeoffs are that the former will be a bit more -// accurate and possibly push less on the stack, but it might also be -// a little bit slower. +inline bool CMTask::is_below_finger(oop obj, HeapWord* global_finger) const { + // If obj is above the global finger, then the mark bitmap scan + // will find it later, and no push is needed. Similarly, if we have + // a current region and obj is between the local finger and the + // end of the current region, then no push is needed. The tradeoff + // of checking both vs only checking the global finger is that the + // local check will be more accurate and so result in fewer pushes, + // but may also be a little slower. + HeapWord* objAddr = (HeapWord*)obj; + if (_finger != NULL) { + // We have a current region. + + // Finger and region values are all NULL or all non-NULL. We + // use _finger to check since we immediately use its value. + assert(_curr_region != NULL, "invariant"); + assert(_region_limit != NULL, "invariant"); + assert(_region_limit <= global_finger, "invariant"); + + // True if obj is less than the local finger, or is between + // the region limit and the global finger. + if (objAddr < _finger) { + return true; + } else if (objAddr < _region_limit) { + return false; + } // Else check global finger. + } + // Check global finger. + return objAddr < global_finger; +} + +inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) { + if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { + + if (_cm->verbose_high()) { + gclog_or_tty->print_cr("[%u] marked object " PTR_FORMAT, + _worker_id, p2i(obj)); + } -#define _CHECK_BOTH_FINGERS_ 1 + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in CMBitMap::parMark() call in the routine above. + HeapWord* global_finger = _cm->finger(); + + // We only need to push a newly grey object on the mark + // stack if it is in a section of memory the mark bitmap + // scan has already examined. Mark bitmap scanning + // maintains progress "fingers" for determining that. + // + // Notice that the global finger might be moving forward + // concurrently. This is not a problem. In the worst case, we + // mark the object while it is above the global finger and, by + // the time we read the global finger, it has moved forward + // past this object. In this case, the object will probably + // be visited when a task is scanning the region and will also + // be pushed on the stack. So, some duplicate work, but no + // correctness problems. + if (is_below_finger(obj, global_finger)) { + if (obj->is_typeArray()) { + // Immediately process arrays of primitive types, rather + // than pushing on the mark stack. This keeps us from + // adding humongous objects to the mark stack that might + // be reclaimed before the entry is processed - see + // selection of candidates for eager reclaim of humongous + // objects. The cost of the additional type test is + // mitigated by avoiding a trip through the mark stack, + // by only doing a bookkeeping update and avoiding the + // actual scan of the object - a typeArray contains no + // references, and the metadata is built-in. + process_grey_object<false>(obj); + } else { + if (_cm->verbose_high()) { + gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT + ", global: " PTR_FORMAT ") pushing " + PTR_FORMAT " on mark stack", + _worker_id, p2i(_finger), + p2i(global_finger), p2i(obj)); + } + push(obj); + } + } + } +} inline void CMTask::deal_with_reference(oop obj) { if (_cm->verbose_high()) { @@ -274,7 +347,7 @@ inline void CMTask::deal_with_reference(oop obj) { _worker_id, p2i((void*) obj)); } - ++_refs_reached; + increment_refs_reached(); HeapWord* objAddr = (HeapWord*) obj; assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); @@ -286,62 +359,7 @@ inline void CMTask::deal_with_reference(oop obj) { // anything with it). HeapRegion* hr = _g1h->heap_region_containing_raw(obj); if (!hr->obj_allocated_since_next_marking(obj)) { - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked", - _worker_id, p2i((void*) obj)); - } - - // we need to mark it first - if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { - // No OrderAccess:store_load() is needed. It is implicit in the - // CAS done in CMBitMap::parMark() call in the routine above. - HeapWord* global_finger = _cm->finger(); - -#if _CHECK_BOTH_FINGERS_ - // we will check both the local and global fingers - - if (_finger != NULL && objAddr < _finger) { - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] below the local finger ("PTR_FORMAT"), " - "pushing it", _worker_id, p2i(_finger)); - } - push(obj); - } else if (_curr_region != NULL && objAddr < _region_limit) { - // do nothing - } else if (objAddr < global_finger) { - // Notice that the global finger might be moving forward - // concurrently. This is not a problem. In the worst case, we - // mark the object while it is above the global finger and, by - // the time we read the global finger, it has moved forward - // passed this object. In this case, the object will probably - // be visited when a task is scanning the region and will also - // be pushed on the stack. So, some duplicate work, but no - // correctness problems. - - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] below the global finger " - "("PTR_FORMAT"), pushing it", - _worker_id, p2i(global_finger)); - } - push(obj); - } else { - // do nothing - } -#else // _CHECK_BOTH_FINGERS_ - // we will only check the global finger - - if (objAddr < global_finger) { - // see long comment above - - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] below the global finger " - "("PTR_FORMAT"), pushing it", - _worker_id, p2i(global_finger)); - } - push(obj); - } -#endif // _CHECK_BOTH_FINGERS_ - } + make_reference_grey(obj, hr); } } } diff --git a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp index 9d2f06255..697a40275 100644 --- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp +++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp @@ -254,25 +254,23 @@ void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size, bool force) { assert(!force, "not supported for GC alloc regions"); - return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForSurvived); + return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young); } void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region, size_t allocated_bytes) { - _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, - GCAllocForSurvived); + _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young); } HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size, bool force) { assert(!force, "not supported for GC alloc regions"); - return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForTenured); + return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old); } void OldGCAllocRegion::retire_region(HeapRegion* alloc_region, size_t allocated_bytes) { - _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, - GCAllocForTenured); + _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old); } HeapRegion* OldGCAllocRegion::release() { diff --git a/src/share/vm/gc_implementation/g1/g1Allocator.cpp b/src/share/vm/gc_implementation/g1/g1Allocator.cpp index 2e223fcc1..0d1ab8411 100644 --- a/src/share/vm/gc_implementation/g1/g1Allocator.cpp +++ b/src/share/vm/gc_implementation/g1/g1Allocator.cpp @@ -59,7 +59,7 @@ void G1Allocator::reuse_retained_old_region(EvacuationInfo& evacuation_info, !(retained_region->top() == retained_region->end()) && !retained_region->is_empty() && !retained_region->isHumongous()) { - retained_region->record_top_and_timestamp(); + retained_region->record_timestamp(); // The retained region was added to the old region set when it was // retired. We have to remove it now, since we don't allow regions // we allocate to in the region sets. We'll re-add it later, when @@ -94,6 +94,9 @@ void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, Evacuat // want either way so no reason to check explicitly for either // condition. _retained_old_gc_alloc_region = old_gc_alloc_region(context)->release(); + if (_retained_old_gc_alloc_region != NULL) { + _retained_old_gc_alloc_region->record_retained_region(); + } if (ResizePLAB) { _g1h->_survivor_plab_stats.adjust_desired_plab_sz(no_of_gc_workers); @@ -110,15 +113,16 @@ void G1DefaultAllocator::abandon_gc_alloc_regions() { G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) : ParGCAllocBuffer(gclab_word_size), _retired(true) { } -HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) { - HeapWord* obj = NULL; - size_t gclab_word_size = _g1h->desired_plab_sz(purpose); +HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest, + size_t word_sz, + AllocationContext_t context) { + size_t gclab_word_size = _g1h->desired_plab_sz(dest); if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) { - G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose, context); + G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context); add_to_alloc_buffer_waste(alloc_buf->words_remaining()); alloc_buf->retire(false /* end_of_gc */, false /* retain */); - HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size, context); + HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context); if (buf == NULL) { return NULL; // Let caller handle allocation failure. } @@ -126,30 +130,33 @@ HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz alloc_buf->set_word_size(gclab_word_size); alloc_buf->set_buf(buf); - obj = alloc_buf->allocate(word_sz); + HeapWord* const obj = alloc_buf->allocate(word_sz); assert(obj != NULL, "buffer was definitely big enough..."); + return obj; } else { - obj = _g1h->par_allocate_during_gc(purpose, word_sz, context); + return _g1h->par_allocate_during_gc(dest, word_sz, context); } - return obj; } G1DefaultParGCAllocator::G1DefaultParGCAllocator(G1CollectedHeap* g1h) : - G1ParGCAllocator(g1h), - _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)), - _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)) { - - _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer; - _alloc_buffers[GCAllocForTenured] = &_tenured_alloc_buffer; - + G1ParGCAllocator(g1h), + _surviving_alloc_buffer(g1h->desired_plab_sz(InCSetState::Young)), + _tenured_alloc_buffer(g1h->desired_plab_sz(InCSetState::Old)) { + for (uint state = 0; state < InCSetState::Num; state++) { + _alloc_buffers[state] = NULL; + } + _alloc_buffers[InCSetState::Young] = &_surviving_alloc_buffer; + _alloc_buffers[InCSetState::Old] = &_tenured_alloc_buffer; } void G1DefaultParGCAllocator::retire_alloc_buffers() { - for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { - size_t waste = _alloc_buffers[ap]->words_remaining(); - add_to_alloc_buffer_waste(waste); - _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap), - true /* end_of_gc */, - false /* retain */); + for (uint state = 0; state < InCSetState::Num; state++) { + G1ParGCAllocBuffer* const buf = _alloc_buffers[state]; + if (buf != NULL) { + add_to_alloc_buffer_waste(buf->words_remaining()); + buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state), + true /* end_of_gc */, + false /* retain */); + } } } diff --git a/src/share/vm/gc_implementation/g1/g1Allocator.hpp b/src/share/vm/gc_implementation/g1/g1Allocator.hpp index 151342c47..b32a4191d 100644 --- a/src/share/vm/gc_implementation/g1/g1Allocator.hpp +++ b/src/share/vm/gc_implementation/g1/g1Allocator.hpp @@ -27,14 +27,9 @@ #include "gc_implementation/g1/g1AllocationContext.hpp" #include "gc_implementation/g1/g1AllocRegion.hpp" +#include "gc_implementation/g1/g1InCSetState.hpp" #include "gc_implementation/shared/parGCAllocBuffer.hpp" -enum GCAllocPurpose { - GCAllocForTenured, - GCAllocForSurvived, - GCAllocPurposeCount -}; - // Base class for G1 allocators. class G1Allocator : public CHeapObj<mtGC> { friend class VMStructs; @@ -178,20 +173,40 @@ class G1ParGCAllocator : public CHeapObj<mtGC> { protected: G1CollectedHeap* _g1h; + // The survivor alignment in effect in bytes. + // == 0 : don't align survivors + // != 0 : align survivors to that alignment + // These values were chosen to favor the non-alignment case since some + // architectures have a special compare against zero instructions. + const uint _survivor_alignment_bytes; + size_t _alloc_buffer_waste; size_t _undo_waste; void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; } void add_to_undo_waste(size_t waste) { _undo_waste += waste; } - HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context); - virtual void retire_alloc_buffers() = 0; - virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) = 0; + virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0; + + // Calculate the survivor space object alignment in bytes. Returns that or 0 if + // there are no restrictions on survivor alignment. + static uint calc_survivor_alignment_bytes() { + assert(SurvivorAlignmentInBytes >= ObjectAlignmentInBytes, "sanity"); + if (SurvivorAlignmentInBytes == ObjectAlignmentInBytes) { + // No need to align objects in the survivors differently, return 0 + // which means "survivor alignment is not used". + return 0; + } else { + assert(SurvivorAlignmentInBytes > 0, "sanity"); + return SurvivorAlignmentInBytes; + } + } public: G1ParGCAllocator(G1CollectedHeap* g1h) : - _g1h(g1h), _alloc_buffer_waste(0), _undo_waste(0) { + _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()), + _alloc_buffer_waste(0), _undo_waste(0) { } static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h); @@ -199,24 +214,40 @@ public: size_t alloc_buffer_waste() { return _alloc_buffer_waste; } size_t undo_waste() {return _undo_waste; } - HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) { - HeapWord* obj = NULL; - if (purpose == GCAllocForSurvived) { - obj = alloc_buffer(purpose, context)->allocate_aligned(word_sz, SurvivorAlignmentInBytes); + // Allocate word_sz words in dest, either directly into the regions or by + // allocating a new PLAB. Returns the address of the allocated memory, NULL if + // not successful. + HeapWord* allocate_direct_or_new_plab(InCSetState dest, + size_t word_sz, + AllocationContext_t context); + + // Allocate word_sz words in the PLAB of dest. Returns the address of the + // allocated memory, NULL if not successful. + HeapWord* plab_allocate(InCSetState dest, + size_t word_sz, + AllocationContext_t context) { + G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context); + if (_survivor_alignment_bytes == 0) { + return buffer->allocate(word_sz); } else { - obj = alloc_buffer(purpose, context)->allocate(word_sz); + return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes); } + } + + HeapWord* allocate(InCSetState dest, size_t word_sz, + AllocationContext_t context) { + HeapWord* const obj = plab_allocate(dest, word_sz, context); if (obj != NULL) { return obj; } - return allocate_slow(purpose, word_sz, context); + return allocate_direct_or_new_plab(dest, word_sz, context); } - void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz, AllocationContext_t context) { - if (alloc_buffer(purpose, context)->contains(obj)) { - assert(alloc_buffer(purpose, context)->contains(obj + word_sz - 1), + void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) { + if (alloc_buffer(dest, context)->contains(obj)) { + assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1), "should contain whole object"); - alloc_buffer(purpose, context)->undo_allocation(obj, word_sz); + alloc_buffer(dest, context)->undo_allocation(obj, word_sz); } else { CollectedHeap::fill_with_object(obj, word_sz); add_to_undo_waste(word_sz); @@ -227,13 +258,17 @@ public: class G1DefaultParGCAllocator : public G1ParGCAllocator { G1ParGCAllocBuffer _surviving_alloc_buffer; G1ParGCAllocBuffer _tenured_alloc_buffer; - G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount]; + G1ParGCAllocBuffer* _alloc_buffers[InCSetState::Num]; public: G1DefaultParGCAllocator(G1CollectedHeap* g1h); - virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) { - return _alloc_buffers[purpose]; + virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) { + assert(dest.is_valid(), + err_msg("Allocation buffer index out-of-bounds: " CSETSTATE_FORMAT, dest.value())); + assert(_alloc_buffers[dest.value()] != NULL, + err_msg("Allocation buffer is NULL: " CSETSTATE_FORMAT, dest.value())); + return _alloc_buffers[dest.value()]; } virtual void retire_alloc_buffers() ; diff --git a/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp b/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp index 955e0487e..88a673574 100644 --- a/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp +++ b/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp @@ -75,7 +75,7 @@ protected: assert((uintptr_t)end % mapping_granularity_in_bytes == 0, err_msg("end mapping area address must be a multiple of mapping granularity %zd, is " PTR_FORMAT, mapping_granularity_in_bytes, p2i(end))); - size_t num_target_elems = (end - bottom) / (mapping_granularity_in_bytes / HeapWordSize); + size_t num_target_elems = pointer_delta(end, bottom, mapping_granularity_in_bytes); idx_t bias = (uintptr_t)bottom / mapping_granularity_in_bytes; address base = create_new_base_array(num_target_elems, target_elem_size_in_bytes); initialize_base(base, num_target_elems, bias, target_elem_size_in_bytes, log2_intptr(mapping_granularity_in_bytes)); diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index b7490f6b1..f5212f059 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,6 +46,7 @@ #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp" #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" +#include "gc_implementation/g1/g1RootProcessor.hpp" #include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/g1/g1YCTypes.hpp" #include "gc_implementation/g1/heapRegion.inline.hpp" @@ -85,18 +86,6 @@ size_t G1CollectedHeap::_humongous_object_threshold_in_words = 0; // apply to TLAB allocation, which is not part of this interface: it // is done by clients of this interface.) -// Notes on implementation of parallelism in different tasks. -// -// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism. -// The number of GC workers is passed to heap_region_par_iterate_chunked(). -// It does use run_task() which sets _n_workers in the task. -// G1ParTask executes g1_process_roots() -> -// SharedHeap::process_roots() which calls eventually to -// CardTableModRefBS::par_non_clean_card_iterate_work() which uses -// SequentialSubTasksDone. SharedHeap::process_roots() also -// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap). -// - // Local to this file. class RefineCardTableEntryClosure: public CardTableEntryClosure { @@ -364,7 +353,7 @@ void YoungList::print() { HeapRegion* lists[] = {_head, _survivor_head}; const char* names[] = {"YOUNG", "SURVIVOR"}; - for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) { + for (uint list = 0; list < ARRAY_SIZE(lists); ++list) { gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]); HeapRegion *curr = lists[list]; if (curr == NULL) @@ -838,8 +827,8 @@ HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) { assert_heap_not_locked_and_not_at_safepoint(); assert(!isHumongous(word_size), "we do not allow humongous TLABs"); - unsigned int dummy_gc_count_before; - int dummy_gclocker_retry_count = 0; + uint dummy_gc_count_before; + uint dummy_gclocker_retry_count = 0; return attempt_allocation(word_size, &dummy_gc_count_before, &dummy_gclocker_retry_count); } @@ -849,8 +838,8 @@ G1CollectedHeap::mem_allocate(size_t word_size, assert_heap_not_locked_and_not_at_safepoint(); // Loop until the allocation is satisfied, or unsatisfied after GC. - for (int try_count = 1, gclocker_retry_count = 0; /* we'll return */; try_count += 1) { - unsigned int gc_count_before; + for (uint try_count = 1, gclocker_retry_count = 0; /* we'll return */; try_count += 1) { + uint gc_count_before; HeapWord* result = NULL; if (!isHumongous(word_size)) { @@ -902,8 +891,8 @@ G1CollectedHeap::mem_allocate(size_t word_size, HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, AllocationContext_t context, - unsigned int *gc_count_before_ret, - int* gclocker_retry_count_ret) { + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret) { // Make sure you read the note in attempt_allocation_humongous(). assert_heap_not_locked_and_not_at_safepoint(); @@ -920,7 +909,7 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, HeapWord* result = NULL; for (int try_count = 1; /* we'll return */; try_count += 1) { bool should_try_gc; - unsigned int gc_count_before; + uint gc_count_before; { MutexLockerEx x(Heap_lock); @@ -964,7 +953,7 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, if (should_try_gc) { bool succeeded; result = do_collection_pause(word_size, gc_count_before, &succeeded, - GCCause::_g1_inc_collection_pause); + GCCause::_g1_inc_collection_pause); if (result != NULL) { assert(succeeded, "only way to get back a non-NULL result"); return result; @@ -1018,8 +1007,8 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, } HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size, - unsigned int * gc_count_before_ret, - int* gclocker_retry_count_ret) { + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret) { // The structure of this method has a lot of similarities to // attempt_allocation_slow(). The reason these two were not merged // into a single one is that such a method would require several "if @@ -1052,7 +1041,7 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size, HeapWord* result = NULL; for (int try_count = 1; /* we'll return */; try_count += 1) { bool should_try_gc; - unsigned int gc_count_before; + uint gc_count_before; { MutexLockerEx x(Heap_lock); @@ -1090,7 +1079,7 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size, bool succeeded; result = do_collection_pause(word_size, gc_count_before, &succeeded, - GCCause::_g1_humongous_allocation); + GCCause::_g1_humongous_allocation); if (result != NULL) { assert(succeeded, "only way to get back a non-NULL result"); return result; @@ -1297,7 +1286,6 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, // Timing assert(gc_cause() != GCCause::_java_lang_system_gc || explicit_gc, "invariant"); - gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps); TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty); { @@ -1855,7 +1843,6 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _is_alive_closure_stw(this), _ref_processor_cm(NULL), _ref_processor_stw(NULL), - _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), _bot_shared(NULL), _evac_failure_scan_stack(NULL), _mark_in_progress(false), @@ -1866,7 +1853,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()), _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()), _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()), - _humongous_is_live(), + _humongous_reclaim_candidates(), _has_humongous_reclaim_candidates(false), _free_regions_coming(false), _young_list(new YoungList(this)), @@ -1878,6 +1865,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _old_marking_cycles_started(0), _old_marking_cycles_completed(0), _concurrent_cycle_started(false), + _heap_summary_sent(false), _in_cset_fast_test(), _dirty_cards_region_list(NULL), _worker_cset_start_region(NULL), @@ -1888,9 +1876,6 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()) { _g1h = this; - if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { - vm_exit_during_initialization("Failed necessary allocation."); - } _allocator = G1Allocator::create_allocator(_g1h); _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2; @@ -1902,7 +1887,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : assert(n_rem_sets > 0, "Invariant."); _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC); - _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(unsigned int, n_queues, mtGC); + _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(uint, n_queues, mtGC); _evacuation_failed_info_array = NEW_C_HEAP_ARRAY(EvacuationFailedInfo, n_queues, mtGC); for (int i = 0; i < n_queues; i++) { @@ -1919,6 +1904,26 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : guarantee(_task_queues != NULL, "task_queues allocation failure."); } +G1RegionToSpaceMapper* G1CollectedHeap::create_aux_memory_mapper(const char* description, + size_t size, + size_t translation_factor) { + size_t preferred_page_size = os::page_size_for_region_unaligned(size, 1); + // Allocate a new reserved space, preferring to use large pages. + ReservedSpace rs(size, preferred_page_size); + G1RegionToSpaceMapper* result = + G1RegionToSpaceMapper::create_mapper(rs, + size, + rs.alignment(), + HeapRegion::GrainBytes, + translation_factor, + mtGC); + if (TracePageSizes) { + gclog_or_tty->print_cr("G1 '%s': pg_sz=" SIZE_FORMAT " base=" PTR_FORMAT " size=" SIZE_FORMAT " alignment=" SIZE_FORMAT " reqsize=" SIZE_FORMAT, + description, preferred_page_size, p2i(rs.base()), rs.size(), rs.alignment(), size); + } + return result; +} + jint G1CollectedHeap::initialize() { CollectedHeap::pre_initialize(); os::enable_vtime(); @@ -1992,57 +1997,35 @@ jint G1CollectedHeap::initialize() { ReservedSpace g1_rs = heap_rs.first_part(max_byte_size); G1RegionToSpaceMapper* heap_storage = G1RegionToSpaceMapper::create_mapper(g1_rs, + g1_rs.size(), UseLargePages ? os::large_page_size() : os::vm_page_size(), HeapRegion::GrainBytes, 1, mtJavaHeap); heap_storage->set_mapping_changed_listener(&_listener); - // Reserve space for the block offset table. We do not support automatic uncommit - // for the card table at this time. BOT only. - ReservedSpace bot_rs(G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize)); + // Create storage for the BOT, card table, card counts table (hot card cache) and the bitmaps. G1RegionToSpaceMapper* bot_storage = - G1RegionToSpaceMapper::create_mapper(bot_rs, - os::vm_page_size(), - HeapRegion::GrainBytes, - G1BlockOffsetSharedArray::N_bytes, - mtGC); + create_aux_memory_mapper("Block offset table", + G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize), + G1BlockOffsetSharedArray::N_bytes); ReservedSpace cardtable_rs(G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize)); G1RegionToSpaceMapper* cardtable_storage = - G1RegionToSpaceMapper::create_mapper(cardtable_rs, - os::vm_page_size(), - HeapRegion::GrainBytes, - G1BlockOffsetSharedArray::N_bytes, - mtGC); + create_aux_memory_mapper("Card table", + G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize), + G1BlockOffsetSharedArray::N_bytes); - // Reserve space for the card counts table. - ReservedSpace card_counts_rs(G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize)); G1RegionToSpaceMapper* card_counts_storage = - G1RegionToSpaceMapper::create_mapper(card_counts_rs, - os::vm_page_size(), - HeapRegion::GrainBytes, - G1BlockOffsetSharedArray::N_bytes, - mtGC); + create_aux_memory_mapper("Card counts table", + G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize), + G1BlockOffsetSharedArray::N_bytes); - // Reserve space for prev and next bitmap. size_t bitmap_size = CMBitMap::compute_size(g1_rs.size()); - - ReservedSpace prev_bitmap_rs(ReservedSpace::allocation_align_size_up(bitmap_size)); G1RegionToSpaceMapper* prev_bitmap_storage = - G1RegionToSpaceMapper::create_mapper(prev_bitmap_rs, - os::vm_page_size(), - HeapRegion::GrainBytes, - CMBitMap::mark_distance(), - mtGC); - - ReservedSpace next_bitmap_rs(ReservedSpace::allocation_align_size_up(bitmap_size)); + create_aux_memory_mapper("Prev Bitmap", bitmap_size, CMBitMap::mark_distance()); G1RegionToSpaceMapper* next_bitmap_storage = - G1RegionToSpaceMapper::create_mapper(next_bitmap_rs, - os::vm_page_size(), - HeapRegion::GrainBytes, - CMBitMap::mark_distance(), - mtGC); + create_aux_memory_mapper("Next Bitmap", bitmap_size, CMBitMap::mark_distance()); _hrm.initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage); g1_barrier_set()->initialize(cardtable_storage); @@ -2065,8 +2048,14 @@ jint G1CollectedHeap::initialize() { _g1h = this; - _in_cset_fast_test.initialize(_hrm.reserved().start(), _hrm.reserved().end(), HeapRegion::GrainBytes); - _humongous_is_live.initialize(_hrm.reserved().start(), _hrm.reserved().end(), HeapRegion::GrainBytes); + { + HeapWord* start = _hrm.reserved().start(); + HeapWord* end = _hrm.reserved().end(); + size_t granularity = HeapRegion::GrainBytes; + + _in_cset_fast_test.initialize(start, end, granularity); + _humongous_reclaim_candidates.initialize(start, end, granularity); + } // Create the ConcurrentMark data structure and thread. // (Must do this late, so that "max_regions" is defined.) @@ -2158,11 +2147,6 @@ void G1CollectedHeap::stop() { } } -void G1CollectedHeap::clear_humongous_is_live_table() { - guarantee(G1ReclaimDeadHumongousObjectsAtYoungGC, "Should only be called if true"); - _humongous_is_live.clear(); -} - size_t G1CollectedHeap::conservative_max_heap_alignment() { return HeapRegion::max_region_size(); } @@ -2299,11 +2283,11 @@ void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - int n_completed_buffers = 0; + size_t n_completed_buffers = 0; while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) { n_completed_buffers++; } - g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers); + g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers); dcqs.clear_n_completed_buffers(); assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); } @@ -2347,6 +2331,7 @@ bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) { case GCCause::_java_lang_system_gc: return ExplicitGCInvokesConcurrent; case GCCause::_g1_humongous_allocation: return true; case GCCause::_update_allocation_context_stats_inc: return true; + case GCCause::_wb_conc_mark: return true; default: return false; } } @@ -2448,13 +2433,24 @@ void G1CollectedHeap::register_concurrent_cycle_end() { _gc_timer_cm->register_gc_end(); _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); + // Clear state variables to prepare for the next concurrent cycle. _concurrent_cycle_started = false; + _heap_summary_sent = false; } } void G1CollectedHeap::trace_heap_after_concurrent_cycle() { if (_concurrent_cycle_started) { - trace_heap_after_gc(_gc_tracer_cm); + // This function can be called when: + // the cleanup pause is run + // the concurrent cycle is aborted before the cleanup pause. + // the concurrent cycle is aborted after the cleanup pause, + // but before the concurrent cycle end has been registered. + // Make sure that we only send the heap information once. + if (!_heap_summary_sent) { + trace_heap_after_gc(_gc_tracer_cm); + _heap_summary_sent = true; + } } } @@ -2477,9 +2473,9 @@ G1YCType G1CollectedHeap::yc_type() { void G1CollectedHeap::collect(GCCause::Cause cause) { assert_heap_not_locked(); - unsigned int gc_count_before; - unsigned int old_marking_count_before; - unsigned int full_gc_count_before; + uint gc_count_before; + uint old_marking_count_before; + uint full_gc_count_before; bool retry_gc; do { @@ -3292,11 +3288,12 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) { G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo); G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl); - process_all_roots(true, // activate StrongRootsScope - SO_AllCodeCache, // roots scanning options - &rootsCl, - &cldCl, - &blobsCl); + { + G1RootProcessor root_processor(this); + root_processor.process_all_roots(&rootsCl, + &cldCl, + &blobsCl); + } bool failures = rootsCl.failures() || codeRootsCl.failures(); @@ -3616,7 +3613,7 @@ void G1CollectedHeap::gc_epilogue(bool full) { } HeapWord* G1CollectedHeap::do_collection_pause(size_t word_size, - unsigned int gc_count_before, + uint gc_count_before, bool* succeeded, GCCause::Cause gc_cause) { assert_heap_not_locked_and_not_at_safepoint(); @@ -3671,18 +3668,73 @@ size_t G1CollectedHeap::cards_scanned() { return g1_rem_set()->cardsScanned(); } -bool G1CollectedHeap::humongous_region_is_always_live(uint index) { - HeapRegion* region = region_at(index); - assert(region->startsHumongous(), "Must start a humongous object"); - return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty(); -} - class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { private: size_t _total_humongous; size_t _candidate_humongous; + + DirtyCardQueue _dcq; + + // We don't nominate objects with many remembered set entries, on + // the assumption that such objects are likely still live. + bool is_remset_small(HeapRegion* region) const { + HeapRegionRemSet* const rset = region->rem_set(); + return G1EagerReclaimHumongousObjectsWithStaleRefs + ? rset->occupancy_less_or_equal_than(G1RSetSparseRegionEntries) + : rset->is_empty(); + } + + bool is_typeArray_region(HeapRegion* region) const { + return oop(region->bottom())->is_typeArray(); + } + + bool humongous_region_is_candidate(G1CollectedHeap* heap, HeapRegion* region) const { + assert(region->startsHumongous(), "Must start a humongous object"); + + // Candidate selection must satisfy the following constraints + // while concurrent marking is in progress: + // + // * In order to maintain SATB invariants, an object must not be + // reclaimed if it was allocated before the start of marking and + // has not had its references scanned. Such an object must have + // its references (including type metadata) scanned to ensure no + // live objects are missed by the marking process. Objects + // allocated after the start of concurrent marking don't need to + // be scanned. + // + // * An object must not be reclaimed if it is on the concurrent + // mark stack. Objects allocated after the start of concurrent + // marking are never pushed on the mark stack. + // + // Nominating only objects allocated after the start of concurrent + // marking is sufficient to meet both constraints. This may miss + // some objects that satisfy the constraints, but the marking data + // structures don't support efficiently performing the needed + // additional tests or scrubbing of the mark stack. + // + // However, we presently only nominate is_typeArray() objects. + // A humongous object containing references induces remembered + // set entries on other regions. In order to reclaim such an + // object, those remembered sets would need to be cleaned up. + // + // We also treat is_typeArray() objects specially, allowing them + // to be reclaimed even if allocated before the start of + // concurrent mark. For this we rely on mark stack insertion to + // exclude is_typeArray() objects, preventing reclaiming an object + // that is in the mark stack. We also rely on the metadata for + // such objects to be built-in and so ensured to be kept live. + // Frequent allocation and drop of large binary blobs is an + // important use case for eager reclaim, and this special handling + // may reduce needed headroom. + + return is_typeArray_region(region) && is_remset_small(region); + } + public: - RegisterHumongousWithInCSetFastTestClosure() : _total_humongous(0), _candidate_humongous(0) { + RegisterHumongousWithInCSetFastTestClosure() + : _total_humongous(0), + _candidate_humongous(0), + _dcq(&JavaThread::dirty_card_queue_set()) { } virtual bool doHeapRegion(HeapRegion* r) { @@ -3691,14 +3743,33 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { } G1CollectedHeap* g1h = G1CollectedHeap::heap(); - uint region_idx = r->hrm_index(); - bool is_candidate = !g1h->humongous_region_is_always_live(region_idx); - // Is_candidate already filters out humongous regions with some remembered set. - // This will not lead to humongous object that we mistakenly keep alive because - // during young collection the remembered sets will only be added to. + bool is_candidate = humongous_region_is_candidate(g1h, r); + uint rindex = r->hrm_index(); + g1h->set_humongous_reclaim_candidate(rindex, is_candidate); if (is_candidate) { - g1h->register_humongous_region_with_in_cset_fast_test(region_idx); _candidate_humongous++; + g1h->register_humongous_region_with_in_cset_fast_test(rindex); + // Is_candidate already filters out humongous object with large remembered sets. + // If we have a humongous object with a few remembered sets, we simply flush these + // remembered set entries into the DCQS. That will result in automatic + // re-evaluation of their remembered set entries during the following evacuation + // phase. + if (!r->rem_set()->is_empty()) { + guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), + "Found a not-small remembered set here. This is inconsistent with previous assumptions."); + G1SATBCardTableLoggingModRefBS* bs = g1h->g1_barrier_set(); + HeapRegionRemSetIterator hrrs(r->rem_set()); + size_t card_index; + while (hrrs.has_next(card_index)) { + jbyte* card_ptr = (jbyte*)bs->byte_for_index(card_index); + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + _dcq.enqueue(card_ptr); + } + } + r->rem_set()->clear_locked(); + } + assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); } _total_humongous++; @@ -3707,23 +3778,29 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { size_t total_humongous() const { return _total_humongous; } size_t candidate_humongous() const { return _candidate_humongous; } + + void flush_rem_set_entries() { _dcq.flush(); } }; void G1CollectedHeap::register_humongous_regions_with_in_cset_fast_test() { - if (!G1ReclaimDeadHumongousObjectsAtYoungGC) { - g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0, 0); + if (!G1EagerReclaimHumongousObjects) { + g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0.0, 0, 0); return; } + double time = os::elapsed_counter(); + // Collect reclaim candidate information and register candidates with cset. RegisterHumongousWithInCSetFastTestClosure cl; heap_region_iterate(&cl); - g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(cl.total_humongous(), + + time = ((double)(os::elapsed_counter() - time) / os::elapsed_frequency()) * 1000.0; + g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(time, + cl.total_humongous(), cl.candidate_humongous()); _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0; - if (_has_humongous_reclaim_candidates) { - clear_humongous_is_live_table(); - } + // Finally flush all remembered set entries to re-check into the global DCQS. + cl.flush_rem_set_entries(); } void @@ -3901,10 +3978,10 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty); - int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ? workers()->active_workers() : 1); double pause_start_sec = os::elapsedTime(); - g1_policy()->phase_times()->note_gc_start(active_workers); + g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress()); log_gc_header(); TraceCollectorStats tcs(g1mm()->incremental_collection_counters()); @@ -4014,15 +4091,12 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { register_humongous_regions_with_in_cset_fast_test(); + assert(check_cset_fast_test(), "Inconsistency in the InCSetState table."); + _cm->note_start_of_gc(); - // We should not verify the per-thread SATB buffers given that - // we have not filtered them yet (we'll do so during the - // GC). We also call this after finalize_cset() to + // We call this after finalize_cset() to // ensure that the CSet has been finalized. - _cm->verify_no_cset_oops(true /* verify_stacks */, - true /* verify_enqueued_buffers */, - false /* verify_thread_buffers */, - true /* verify_fingers */); + _cm->verify_no_cset_oops(); if (_hr_printer.is_active()) { HeapRegion* hr = g1_policy()->collection_set(); @@ -4045,16 +4119,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { // Actually do the work... evacuate_collection_set(evacuation_info); - // We do this to mainly verify the per-thread SATB buffers - // (which have been filtered by now) since we didn't verify - // them earlier. No point in re-checking the stacks / enqueued - // buffers given that the CSet has not changed since last time - // we checked. - _cm->verify_no_cset_oops(false /* verify_stacks */, - false /* verify_enqueued_buffers */, - true /* verify_thread_buffers */, - true /* verify_fingers */); - free_collection_set(g1_policy()->collection_set(), evacuation_info); eagerly_reclaim_humongous_regions(); @@ -4137,10 +4201,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { // We redo the verification but now wrt to the new CSet which // has just got initialized after the previous CSet was freed. - _cm->verify_no_cset_oops(true /* verify_stacks */, - true /* verify_enqueued_buffers */, - true /* verify_thread_buffers */, - true /* verify_fingers */); + _cm->verify_no_cset_oops(); _cm->note_end_of_gc(); // This timing is only used by the ergonomics to handle our pause target. @@ -4243,29 +4304,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { return true; } -size_t G1CollectedHeap::desired_plab_sz(GCAllocPurpose purpose) -{ - size_t gclab_word_size; - switch (purpose) { - case GCAllocForSurvived: - gclab_word_size = _survivor_plab_stats.desired_plab_sz(); - break; - case GCAllocForTenured: - gclab_word_size = _old_plab_stats.desired_plab_sz(); - break; - default: - assert(false, "unknown GCAllocPurpose"); - gclab_word_size = _old_plab_stats.desired_plab_sz(); - break; - } - - // Prevent humongous PLAB sizes for two reasons: - // * PLABs are allocated using a similar paths as oops, but should - // never be in a humongous region - // * Allowing humongous PLABs needlessly churns the region free lists - return MIN2(_humongous_object_threshold_in_words, gclab_word_size); -} - void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) { _drain_in_progress = false; set_evac_failure_closure(cl); @@ -4405,35 +4443,6 @@ void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) { } } -HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose, - size_t word_size, - AllocationContext_t context) { - if (purpose == GCAllocForSurvived) { - HeapWord* result = survivor_attempt_allocation(word_size, context); - if (result != NULL) { - return result; - } else { - // Let's try to allocate in the old gen in case we can fit the - // object there. - return old_attempt_allocation(word_size, context); - } - } else { - assert(purpose == GCAllocForTenured, "sanity"); - HeapWord* result = old_attempt_allocation(word_size, context); - if (result != NULL) { - return result; - } else { - // Let's try to allocate in the survivors in case we can fit the - // object there. - return survivor_attempt_allocation(word_size, context); - } - } - - ShouldNotReachHere(); - // Trying to keep some compilers happy. - return NULL; -} - void G1ParCopyHelper::mark_object(oop obj) { assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet"); @@ -4476,14 +4485,14 @@ void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) { assert(_worker_id == _par_scan_state->queue_num(), "sanity"); - G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj); - - if (state == G1CollectedHeap::InCSet) { + const InCSetState state = _g1->in_cset_state(obj); + if (state.is_in_cset()) { oop forwardee; - if (obj->is_forwarded()) { - forwardee = obj->forwardee(); + markOop m = obj->mark(); + if (m->is_marked()) { + forwardee = (oop) m->decode_pointer(); } else { - forwardee = _par_scan_state->copy_to_survivor_space(obj); + forwardee = _par_scan_state->copy_to_survivor_space(state, obj, m); } assert(forwardee != NULL, "forwardee should not be NULL"); oopDesc::encode_store_heap_oop(p, forwardee); @@ -4497,7 +4506,7 @@ void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) { do_klass_barrier(p, forwardee); } } else { - if (state == G1CollectedHeap::IsHumongous) { + if (state.is_humongous()) { _g1->set_humongous_is_live(obj); } // The object is not in collection set. If we're a root scanning @@ -4582,60 +4591,11 @@ class G1KlassScanClosure : public KlassClosure { } }; -class G1CodeBlobClosure : public CodeBlobClosure { - class HeapRegionGatheringOopClosure : public OopClosure { - G1CollectedHeap* _g1h; - OopClosure* _work; - nmethod* _nm; - - template <typename T> - void do_oop_work(T* p) { - _work->do_oop(p); - T oop_or_narrowoop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(oop_or_narrowoop)) { - oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop); - HeapRegion* hr = _g1h->heap_region_containing_raw(o); - assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset"); - hr->add_strong_code_root(_nm); - } - } - - public: - HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {} - - void do_oop(oop* o) { - do_oop_work(o); - } - - void do_oop(narrowOop* o) { - do_oop_work(o); - } - - void set_nm(nmethod* nm) { - _nm = nm; - } - }; - - HeapRegionGatheringOopClosure _oc; -public: - G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {} - - void do_code_blob(CodeBlob* cb) { - nmethod* nm = cb->as_nmethod_or_null(); - if (nm != NULL) { - if (!nm->test_set_oops_do_mark()) { - _oc.set_nm(nm); - nm->oops_do(&_oc); - nm->fix_oop_relocations(); - } - } - } -}; - class G1ParTask : public AbstractGangTask { protected: G1CollectedHeap* _g1h; RefToScanQueueSet *_queues; + G1RootProcessor* _root_processor; ParallelTaskTerminator _terminator; uint _n_workers; @@ -4643,10 +4603,11 @@ protected: Mutex* stats_lock() { return &_stats_lock; } public: - G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues) + G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor) : AbstractGangTask("G1 collection"), _g1h(g1h), _queues(task_queues), + _root_processor(root_processor), _terminator(0, _queues), _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) {} @@ -4660,13 +4621,7 @@ public: ParallelTaskTerminator* terminator() { return &_terminator; } virtual void set_for_termination(int active_workers) { - // This task calls set_n_termination() in par_non_clean_card_iterate_work() - // in the young space (_par_seq_tasks) in the G1 heap - // for SequentialSubTasksDone. - // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap - // both of which need setting by set_n_termination(). - _g1h->SharedHeap::set_n_termination(active_workers); - _g1h->set_n_termination(active_workers); + _root_processor->set_num_workers(active_workers); terminator()->reset_for_reuse(active_workers); _n_workers = active_workers; } @@ -4703,8 +4658,7 @@ public: void work(uint worker_id) { if (worker_id >= _n_workers) return; // no work needed this round - double start_time_ms = os::elapsedTime() * 1000.0; - _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime()); { ResourceMark rm; @@ -4736,24 +4690,21 @@ public: false, // Process all klasses. true); // Need to claim CLDs. - G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl); - G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl); - // IM Weak code roots are handled later. - OopClosure* strong_root_cl; OopClosure* weak_root_cl; CLDClosure* strong_cld_cl; CLDClosure* weak_cld_cl; - CodeBlobClosure* strong_code_cl; + + bool trace_metadata = false; if (_g1h->g1_policy()->during_initial_mark_pause()) { // We also need to mark copied objects. strong_root_cl = &scan_mark_root_cl; strong_cld_cl = &scan_mark_cld_cl; - strong_code_cl = &scan_mark_code_cl; if (ClassUnloadingWithConcurrentMark) { weak_root_cl = &scan_mark_weak_root_cl; weak_cld_cl = &scan_mark_weak_cld_cl; + trace_metadata = true; } else { weak_root_cl = &scan_mark_root_cl; weak_cld_cl = &scan_mark_cld_cl; @@ -4763,31 +4714,32 @@ public: weak_root_cl = &scan_only_root_cl; strong_cld_cl = &scan_only_cld_cl; weak_cld_cl = &scan_only_cld_cl; - strong_code_cl = &scan_only_code_cl; } - - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); - pss.start_strong_roots(); - _g1h->g1_process_roots(strong_root_cl, - weak_root_cl, - &push_heap_rs_cl, - strong_cld_cl, - weak_cld_cl, - strong_code_cl, - worker_id); + _root_processor->evacuate_roots(strong_root_cl, + weak_root_cl, + strong_cld_cl, + weak_cld_cl, + trace_metadata, + worker_id); + + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); + _root_processor->scan_remembered_sets(&push_heap_rs_cl, + weak_root_cl, + worker_id); pss.end_strong_roots(); { double start = os::elapsedTime(); G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); evac.do_void(); - double elapsed_ms = (os::elapsedTime()-start)*1000.0; - double term_ms = pss.term_time()*1000.0; - _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms); - _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts()); + double elapsed_sec = os::elapsedTime() - start; + double term_sec = pss.term_time(); + _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); + _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts()); } _g1h->g1_policy()->record_thread_age_table(pss.age_table()); _g1h->update_surviving_young_words(pss.surviving_young_words()+1); @@ -4803,100 +4755,10 @@ public: // destructors are executed here and are included as part of the // "GC Worker Time". } - - double end_time_ms = os::elapsedTime() * 1000.0; - _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime()); } }; -// *** Common G1 Evacuation Stuff - -// This method is run in a GC worker. - -void -G1CollectedHeap:: -g1_process_roots(OopClosure* scan_non_heap_roots, - OopClosure* scan_non_heap_weak_roots, - OopsInHeapRegionClosure* scan_rs, - CLDClosure* scan_strong_clds, - CLDClosure* scan_weak_clds, - CodeBlobClosure* scan_strong_code, - uint worker_i) { - - // First scan the shared roots. - double ext_roots_start = os::elapsedTime(); - double closure_app_time_sec = 0.0; - - bool during_im = _g1h->g1_policy()->during_initial_mark_pause(); - bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark; - - BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); - BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots); - - process_roots(false, // no scoping; this is parallel code - SharedHeap::SO_None, - &buf_scan_non_heap_roots, - &buf_scan_non_heap_weak_roots, - scan_strong_clds, - // Unloading Initial Marks handle the weak CLDs separately. - (trace_metadata ? NULL : scan_weak_clds), - scan_strong_code); - - // Now the CM ref_processor roots. - if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { - // We need to treat the discovered reference lists of the - // concurrent mark ref processor as roots and keep entries - // (which are added by the marking threads) on them live - // until they can be processed at the end of marking. - ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots); - } - - if (trace_metadata) { - // Barrier to make sure all workers passed - // the strong CLD and strong nmethods phases. - active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads()); - - // Now take the complement of the strong CLDs. - ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds); - } - - // Finish up any enqueued closure apps (attributed as object copy time). - buf_scan_non_heap_roots.done(); - buf_scan_non_heap_weak_roots.done(); - - double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds() - + buf_scan_non_heap_weak_roots.closure_app_seconds(); - - g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); - - double ext_root_time_ms = - ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0; - - g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms); - - // During conc marking we have to filter the per-thread SATB buffers - // to make sure we remove any oops into the CSet (which will show up - // as implicitly live). - double satb_filtering_ms = 0.0; - if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) { - if (mark_in_progress()) { - double satb_filter_start = os::elapsedTime(); - - JavaThread::satb_mark_queue_set().filter_thread_buffers(); - - satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0; - } - } - g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms); - - // Now scan the complement of the collection set. - G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots); - - g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i); - - _process_strong_tasks->all_tasks_completed(); -} - class G1StringSymbolTableUnlinkTask : public AbstractGangTask { private: BoolObjectClosure* _is_alive; @@ -5184,12 +5046,8 @@ class G1KlassCleaningTask : public StackObj { public: void clean_klass(InstanceKlass* ik) { - ik->clean_implementors_list(_is_alive); - ik->clean_method_data(_is_alive); + ik->clean_weak_instanceklass_links(_is_alive); - // G1 specific cleanup work that has - // been moved here to be done in parallel. - ik->clean_dependent_nmethods(); if (JvmtiExport::has_redefined_a_class()) { InstanceKlass::purge_previous_versions(ik); } @@ -5310,7 +5168,8 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask { G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { } virtual void work(uint worker_id) { - double start_time = os::elapsedTime(); + G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times(); + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id); RedirtyLoggedCardTableEntryClosure cl; if (G1CollectedHeap::heap()->use_parallel_gc_threads()) { @@ -5319,9 +5178,7 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask { _queue->apply_closure_to_all_completed_buffers(&cl); } - G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times(); - timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0); - timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed()); + phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed()); } }; @@ -5382,17 +5239,17 @@ public: oop obj = *p; assert(obj != NULL, "the caller should have filtered out NULL values"); - G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj); - if (cset_state == G1CollectedHeap::InNeither) { + const InCSetState cset_state = _g1->in_cset_state(obj); + if (!cset_state.is_in_cset_or_humongous()) { return; } - if (cset_state == G1CollectedHeap::InCSet) { + if (cset_state.is_in_cset()) { assert( obj->is_forwarded(), "invariant" ); *p = obj->forwardee(); } else { assert(!obj->is_forwarded(), "invariant" ); - assert(cset_state == G1CollectedHeap::IsHumongous, - err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state)); + assert(cset_state.is_humongous(), + err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state.value())); _g1->set_humongous_is_live(obj); } } @@ -5885,7 +5742,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { n_workers = 1; } - G1ParTask g1_par_task(this, _task_queues); init_for_evac_failure(NULL); @@ -5896,7 +5752,8 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { double end_par_time_sec; { - StrongRootsScope srs(this); + G1RootProcessor root_processor(this); + G1ParTask g1_par_task(this, _task_queues, &root_processor); // InitialMark needs claim bits to keep track of the marked-through CLDs. if (g1_policy()->during_initial_mark_pause()) { ClassLoaderDataGraph::clear_claimed_marks(); @@ -5917,18 +5774,20 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { end_par_time_sec = os::elapsedTime(); // Closing the inner scope will execute the destructor - // for the StrongRootsScope object. We record the current + // for the G1RootProcessor object. We record the current // elapsed time before closing the scope so that time - // taken for the SRS destructor is NOT included in the + // taken for the destructor is NOT included in the // reported parallel time. } + G1GCPhaseTimes* phase_times = g1_policy()->phase_times(); + double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0; - g1_policy()->phase_times()->record_par_time(par_time_ms); + phase_times->record_par_time(par_time_ms); double code_root_fixup_time_ms = (os::elapsedTime() - end_par_time_sec) * 1000.0; - g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms); + phase_times->record_code_root_fixup_time(code_root_fixup_time_ms); set_par_threads(0); @@ -5939,14 +5798,15 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { // not copied during the pause. process_discovered_references(n_workers); - // Weak root processing. - { + if (G1StringDedup::is_enabled()) { + double fixup_start = os::elapsedTime(); + G1STWIsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); - JNIHandles::weak_oops_do(&is_alive, &keep_alive); - if (G1StringDedup::is_enabled()) { - G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive); - } + G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times); + + double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0; + phase_times->record_string_dedup_fixup_time(fixup_time_ms); } _allocator->release_gc_alloc_regions(n_workers, evacuation_info); @@ -6214,6 +6074,70 @@ void G1CollectedHeap::check_bitmaps(const char* caller) { heap_region_iterate(&cl); guarantee(!cl.failures(), "bitmap verification"); } + +class G1CheckCSetFastTableClosure : public HeapRegionClosure { + private: + bool _failures; + public: + G1CheckCSetFastTableClosure() : HeapRegionClosure(), _failures(false) { } + + virtual bool doHeapRegion(HeapRegion* hr) { + uint i = hr->hrm_index(); + InCSetState cset_state = (InCSetState) G1CollectedHeap::heap()->_in_cset_fast_test.get_by_index(i); + if (hr->isHumongous()) { + if (hr->in_collection_set()) { + gclog_or_tty->print_cr("\n## humongous region %u in CSet", i); + _failures = true; + return true; + } + if (cset_state.is_in_cset()) { + gclog_or_tty->print_cr("\n## inconsistent cset state %d for humongous region %u", cset_state.value(), i); + _failures = true; + return true; + } + if (hr->continuesHumongous() && cset_state.is_humongous()) { + gclog_or_tty->print_cr("\n## inconsistent cset state %d for continues humongous region %u", cset_state.value(), i); + _failures = true; + return true; + } + } else { + if (cset_state.is_humongous()) { + gclog_or_tty->print_cr("\n## inconsistent cset state %d for non-humongous region %u", cset_state.value(), i); + _failures = true; + return true; + } + if (hr->in_collection_set() != cset_state.is_in_cset()) { + gclog_or_tty->print_cr("\n## in CSet %d / cset state %d inconsistency for region %u", + hr->in_collection_set(), cset_state.value(), i); + _failures = true; + return true; + } + if (cset_state.is_in_cset()) { + if (hr->is_young() != (cset_state.is_young())) { + gclog_or_tty->print_cr("\n## is_young %d / cset state %d inconsistency for region %u", + hr->is_young(), cset_state.value(), i); + _failures = true; + return true; + } + if (hr->is_old() != (cset_state.is_old())) { + gclog_or_tty->print_cr("\n## is_old %d / cset state %d inconsistency for region %u", + hr->is_old(), cset_state.value(), i); + _failures = true; + return true; + } + } + } + return false; + } + + bool failures() const { return _failures; } +}; + +bool G1CollectedHeap::check_cset_fast_test() { + G1CheckCSetFastTableClosure cl; + _hrm.iterate(&cl); + return !cl.failures(); +} #endif // PRODUCT void G1CollectedHeap::cleanUpCardTable() { @@ -6409,47 +6333,47 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure { // are completely up-to-date wrt to references to the humongous object. // // Other implementation considerations: - // - never consider object arrays: while they are a valid target, they have not - // been observed to be used as temporary objects. - // - they would also pose considerable effort for cleaning up the the remembered - // sets. - // While this cleanup is not strictly necessary to be done (or done instantly), - // given that their occurrence is very low, this saves us this additional - // complexity. + // - never consider object arrays at this time because they would pose + // considerable effort for cleaning up the the remembered sets. This is + // required because stale remembered sets might reference locations that + // are currently allocated into. uint region_idx = r->hrm_index(); - if (g1h->humongous_is_live(region_idx) || - g1h->humongous_region_is_always_live(region_idx)) { + if (!g1h->is_humongous_reclaim_candidate(region_idx) || + !r->rem_set()->is_empty()) { - if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) { - gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d", - r->isHumongous(), + if (G1TraceEagerReclaimHumongousObjects) { + gclog_or_tty->print_cr("Live humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d reclaim candidate %d type array %d", region_idx, + obj->size()*HeapWordSize, + r->bottom(), + r->region_num(), r->rem_set()->occupied(), r->rem_set()->strong_code_roots_list_length(), next_bitmap->isMarked(r->bottom()), - g1h->humongous_is_live(region_idx), - obj->is_objArray() + g1h->is_humongous_reclaim_candidate(region_idx), + obj->is_typeArray() ); } return false; } - guarantee(!obj->is_objArray(), - err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.", + guarantee(obj->is_typeArray(), + err_msg("Only eagerly reclaiming type arrays is supported, but the object " + PTR_FORMAT " is not.", r->bottom())); - if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) { - gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d", - r->isHumongous(), - r->bottom(), + if (G1TraceEagerReclaimHumongousObjects) { + gclog_or_tty->print_cr("Dead humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d reclaim candidate %d type array %d", region_idx, + obj->size()*HeapWordSize, + r->bottom(), r->region_num(), r->rem_set()->occupied(), r->rem_set()->strong_code_roots_list_length(), next_bitmap->isMarked(r->bottom()), - g1h->humongous_is_live(region_idx), - obj->is_objArray() + g1h->is_humongous_reclaim_candidate(region_idx), + obj->is_typeArray() ); } // Need to clear mark bit of the humongous object if already set. @@ -6480,7 +6404,8 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure { void G1CollectedHeap::eagerly_reclaim_humongous_regions() { assert_at_safepoint(true); - if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) { + if (!G1EagerReclaimHumongousObjects || + (!_has_humongous_reclaim_candidates && !G1TraceEagerReclaimHumongousObjects)) { g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0); return; } @@ -6793,20 +6718,20 @@ void G1CollectedHeap::set_par_threads() { HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, uint count, - GCAllocPurpose ap) { + InCSetState dest) { assert(FreeList_lock->owned_by_self(), "pre-condition"); - if (count < g1_policy()->max_regions(ap)) { - bool survivor = (ap == GCAllocForSurvived); + if (count < g1_policy()->max_regions(dest)) { + const bool is_survivor = (dest.is_young()); HeapRegion* new_alloc_region = new_region(word_size, - !survivor, + !is_survivor, true /* do_expand */); if (new_alloc_region != NULL) { // We really only need to do this for old regions given that we // should never scan survivors. But it doesn't hurt to do it // for survivors too. - new_alloc_region->record_top_and_timestamp(); - if (survivor) { + new_alloc_region->record_timestamp(); + if (is_survivor) { new_alloc_region->set_survivor(); _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor); check_bitmaps("Survivor Region Allocation", new_alloc_region); @@ -6818,8 +6743,6 @@ HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, bool during_im = g1_policy()->during_initial_mark_pause(); new_alloc_region->note_start_of_copying(during_im); return new_alloc_region; - } else { - g1_policy()->note_alloc_region_limit_reached(ap); } } return NULL; @@ -6827,11 +6750,11 @@ HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region, size_t allocated_bytes, - GCAllocPurpose ap) { + InCSetState dest) { bool during_im = g1_policy()->during_initial_mark_pause(); alloc_region->note_end_of_copying(during_im); g1_policy()->record_bytes_copied_during_gc(allocated_bytes); - if (ap == GCAllocForSurvived) { + if (dest.is_young()) { young_list()->add_survivor_region(alloc_region); } else { _old_set.add(alloc_region); diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 6aaa9dd5f..500390af1 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,7 @@ #include "gc_implementation/g1/g1AllocRegion.hpp" #include "gc_implementation/g1/g1BiasedArray.hpp" #include "gc_implementation/g1/g1HRPrinter.hpp" +#include "gc_implementation/g1/g1InCSetState.hpp" #include "gc_implementation/g1/g1MonitoringSupport.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #include "gc_implementation/g1/g1YCTypes.hpp" @@ -212,6 +213,9 @@ class G1CollectedHeap : public SharedHeap { // Other related classes. friend class G1MarkSweep; + // Testing classes. + friend class G1CheckCSetFastTableClosure; + private: // The one and only G1CollectedHeap, so static functions can find it. static G1CollectedHeap* _g1h; @@ -229,7 +233,6 @@ private: // It keeps track of the humongous regions. HeapRegionSet _humongous_set; - void clear_humongous_is_live_table(); void eagerly_reclaim_humongous_regions(); // The number of regions we could create by expansion. @@ -299,22 +302,26 @@ private: // Helper for monitoring and management support. G1MonitoringSupport* _g1mm; - // Records whether the region at the given index is kept live by roots or - // references from the young generation. - class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> { + // Records whether the region at the given index is (still) a + // candidate for eager reclaim. Only valid for humongous start + // regions; other regions have unspecified values. Humongous start + // regions are initialized at start of collection pause, with + // candidates removed from the set as they are found reachable from + // roots or the young generation. + class HumongousReclaimCandidates : public G1BiasedMappedArray<bool> { protected: bool default_value() const { return false; } public: void clear() { G1BiasedMappedArray<bool>::clear(); } - void set_live(uint region) { - set_by_index(region, true); + void set_candidate(uint region, bool value) { + set_by_index(region, value); } - bool is_live(uint region) { + bool is_candidate(uint region) { return get_by_index(region); } }; - HumongousIsLiveBiasedMappedArray _humongous_is_live; + HumongousReclaimCandidates _humongous_reclaim_candidates; // Stores whether during humongous object registration we found candidate regions. // If not, we can skip a few steps. bool _has_humongous_reclaim_candidates; @@ -339,13 +346,14 @@ private: // Keeps track of how many "old marking cycles" (i.e., Full GCs or // concurrent cycles) we have started. - volatile unsigned int _old_marking_cycles_started; + volatile uint _old_marking_cycles_started; // Keeps track of how many "old marking cycles" (i.e., Full GCs or // concurrent cycles) we have completed. - volatile unsigned int _old_marking_cycles_completed; + volatile uint _old_marking_cycles_completed; bool _concurrent_cycle_started; + bool _heap_summary_sent; // This is a non-product method that is helpful for testing. It is // called at the end of a GC and artificially expands the heap by @@ -362,6 +370,12 @@ private: // heap after a compaction. void print_hrm_post_compaction(); + // Create a memory mapper for auxiliary data structures of the given size and + // translation factor. + static G1RegionToSpaceMapper* create_aux_memory_mapper(const char* description, + size_t size, + size_t translation_factor); + double verify(bool guard, const char* msg); void verify_before_gc(); void verify_after_gc(); @@ -510,22 +524,22 @@ protected: // the mutator alloc region without taking the Heap_lock. This // should only be used for non-humongous allocations. inline HeapWord* attempt_allocation(size_t word_size, - unsigned int* gc_count_before_ret, - int* gclocker_retry_count_ret); + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret); // Second-level mutator allocation attempt: take the Heap_lock and // retry the allocation attempt, potentially scheduling a GC // pause. This should only be used for non-humongous allocations. HeapWord* attempt_allocation_slow(size_t word_size, AllocationContext_t context, - unsigned int* gc_count_before_ret, - int* gclocker_retry_count_ret); + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret); // Takes the Heap_lock and attempts a humongous allocation. It can // potentially schedule a GC pause. HeapWord* attempt_allocation_humongous(size_t word_size, - unsigned int* gc_count_before_ret, - int* gclocker_retry_count_ret); + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret); // Allocation attempt that should be called during safepoints (e.g., // at the end of a successful GC). expect_null_mutator_alloc_region @@ -545,15 +559,9 @@ protected: // allocation region, either by picking one or expanding the // heap, and then allocate a block of the given size. The block // may not be a humongous - it must fit into a single heap region. - HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, - size_t word_size, - AllocationContext_t context); - - HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose, - HeapRegion* alloc_region, - bool par, - size_t word_size); - + inline HeapWord* par_allocate_during_gc(InCSetState dest, + size_t word_size, + AllocationContext_t context); // Ensure that no further allocations can happen in "r", bearing in mind // that parallel threads might be attempting allocations. void par_allocate_remaining_space(HeapRegion* r); @@ -575,9 +583,9 @@ protected: // For GC alloc regions. HeapRegion* new_gc_alloc_region(size_t word_size, uint count, - GCAllocPurpose ap); + InCSetState dest); void retire_gc_alloc_region(HeapRegion* alloc_region, - size_t allocated_bytes, GCAllocPurpose ap); + size_t allocated_bytes, InCSetState dest); // - if explicit_gc is true, the GC is for a System.gc() or a heap // inspection request and should collect the entire heap @@ -638,26 +646,11 @@ public: // (Rounds up to a HeapRegion boundary.) bool expand(size_t expand_bytes); - // Returns the PLAB statistics given a purpose. - PLABStats* stats_for_purpose(GCAllocPurpose purpose) { - PLABStats* stats = NULL; - - switch (purpose) { - case GCAllocForSurvived: - stats = &_survivor_plab_stats; - break; - case GCAllocForTenured: - stats = &_old_plab_stats; - break; - default: - assert(false, "unrecognized GCAllocPurpose"); - } - - return stats; - } + // Returns the PLAB statistics for a given destination. + inline PLABStats* alloc_buffer_stats(InCSetState dest); - // Determines PLAB size for a particular allocation purpose. - size_t desired_plab_sz(GCAllocPurpose purpose); + // Determines PLAB size for a given destination. + inline size_t desired_plab_sz(InCSetState dest); inline AllocationContextStats& allocation_context_stats(); @@ -665,15 +658,15 @@ public: virtual void gc_prologue(bool full); virtual void gc_epilogue(bool full); - inline void set_humongous_is_live(oop obj); + // Modify the reclaim candidate set and test for presence. + // These are only valid for starts_humongous regions. + inline void set_humongous_reclaim_candidate(uint region, bool value); + inline bool is_humongous_reclaim_candidate(uint region); - bool humongous_is_live(uint region) { - return _humongous_is_live.is_live(region); - } + // Remove from the reclaim candidate set. Also remove from the + // collection set so that later encounters avoid the slow path. + inline void set_humongous_is_live(oop obj); - // Returns whether the given region (which must be a humongous (start) region) - // is to be considered conservatively live regardless of any other conditions. - bool humongous_region_is_always_live(uint index); // Register the given region to be part of the collection set. inline void register_humongous_region_with_in_cset_fast_test(uint index); // Register regions with humongous objects (actually on the start region) in @@ -681,8 +674,11 @@ public: void register_humongous_regions_with_in_cset_fast_test(); // We register a region with the fast "in collection set" test. We // simply set to true the array slot corresponding to this region. - void register_region_with_in_cset_fast_test(HeapRegion* r) { - _in_cset_fast_test.set_in_cset(r->hrm_index()); + void register_young_region_with_in_cset_fast_test(HeapRegion* r) { + _in_cset_fast_test.set_in_young(r->hrm_index()); + } + void register_old_region_with_in_cset_fast_test(HeapRegion* r) { + _in_cset_fast_test.set_in_old(r->hrm_index()); } // This is a fast test on whether a reference points into the @@ -714,7 +710,7 @@ public: // +ExplicitGCInvokesConcurrent). void increment_old_marking_cycles_completed(bool concurrent); - unsigned int old_marking_cycles_completed() { + uint old_marking_cycles_completed() { return _old_marking_cycles_completed; } @@ -773,7 +769,7 @@ protected: // methods that call do_collection_pause() release the Heap_lock // before the call, so it's easy to read gc_count_before just before. HeapWord* do_collection_pause(size_t word_size, - unsigned int gc_count_before, + uint gc_count_before, bool* succeeded, GCCause::Cause gc_cause); @@ -812,22 +808,6 @@ protected: // statistics or updating free lists. void abandon_collection_set(HeapRegion* cs_head); - // Applies "scan_non_heap_roots" to roots outside the heap, - // "scan_rs" to roots inside the heap (having done "set_region" to - // indicate the region in which the root resides), - // and does "scan_metadata" If "scan_rs" is - // NULL, then this step is skipped. The "worker_i" - // param is for use with parallel roots processing, and should be - // the "i" of the calling parallel worker thread's work(i) function. - // In the sequential case this param will be ignored. - void g1_process_roots(OopClosure* scan_non_heap_roots, - OopClosure* scan_non_heap_weak_roots, - OopsInHeapRegionClosure* scan_rs, - CLDClosure* scan_strong_clds, - CLDClosure* scan_weak_clds, - CodeBlobClosure* scan_strong_code, - uint worker_i); - // The concurrent marker (and the thread it runs in.) ConcurrentMark* _cm; ConcurrentMarkThread* _cmThread; @@ -1012,23 +992,12 @@ protected: // The heap region entry for a given worker is valid iff // the associated time stamp value matches the current value // of G1CollectedHeap::_gc_time_stamp. - unsigned int* _worker_cset_start_region_time_stamp; - - enum G1H_process_roots_tasks { - G1H_PS_filter_satb_buffers, - G1H_PS_refProcessor_oops_do, - // Leave this one last. - G1H_PS_NumElements - }; - - SubTasksDone* _process_strong_tasks; + uint* _worker_cset_start_region_time_stamp; volatile bool _free_regions_coming; public: - SubTasksDone* process_strong_tasks() { return _process_strong_tasks; } - void set_refine_cte_cl_concurrency(bool concurrent); RefToScanQueue *task_queue(int i) const; @@ -1061,21 +1030,11 @@ public: // Initialize weak reference processing. virtual void ref_processing_init(); - void set_par_threads(uint t) { - SharedHeap::set_par_threads(t); - // Done in SharedHeap but oddly there are - // two _process_strong_tasks's in a G1CollectedHeap - // so do it here too. - _process_strong_tasks->set_n_threads(t); - } - + // Explicitly import set_par_threads into this scope + using SharedHeap::set_par_threads; // Set _n_par_threads according to a policy TBD. void set_par_threads(); - void set_n_termination(int t) { - _process_strong_tasks->set_n_threads(t); - } - virtual CollectedHeap::Name kind() const { return CollectedHeap::G1CollectedHeap; } @@ -1150,6 +1109,10 @@ public: // The number of regions that are completely free. uint num_free_regions() const { return _hrm.num_free_regions(); } + MemoryUsage get_auxiliary_data_memory_usage() const { + return _hrm.get_auxiliary_data_memory_usage(); + } + // The number of regions that are not completely free. uint num_used_regions() const { return num_regions() - num_free_regions(); } @@ -1182,6 +1145,9 @@ public: // appropriate error messages and crash. void check_bitmaps(const char* caller) PRODUCT_RETURN; + // Do sanity check on the contents of the in-cset fast test table. + bool check_cset_fast_test() PRODUCT_RETURN_( return true; ); + // verify_region_sets() performs verification over the region // lists. It will be compiled in the product code to be used when // necessary (i.e., during heap verification). @@ -1277,53 +1243,15 @@ public: inline bool is_in_cset_or_humongous(const oop obj); - enum in_cset_state_t { - InNeither, // neither in collection set nor humongous - InCSet, // region is in collection set only - IsHumongous // region is a humongous start region - }; private: - // Instances of this class are used for quick tests on whether a reference points - // into the collection set or is a humongous object (points into a humongous - // object). - // Each of the array's elements denotes whether the corresponding region is in - // the collection set or a humongous region. - // We use this to quickly reclaim humongous objects: by making a humongous region - // succeed this test, we sort-of add it to the collection set. During the reference - // iteration closures, when we see a humongous region, we simply mark it as - // referenced, i.e. live. - class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> { - protected: - char default_value() const { return G1CollectedHeap::InNeither; } - public: - void set_humongous(uintptr_t index) { - assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values"); - set_by_index(index, G1CollectedHeap::IsHumongous); - } - - void clear_humongous(uintptr_t index) { - set_by_index(index, G1CollectedHeap::InNeither); - } - - void set_in_cset(uintptr_t index) { - assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value"); - set_by_index(index, G1CollectedHeap::InCSet); - } - - bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; } - bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; } - G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); } - void clear() { G1BiasedMappedArray<char>::clear(); } - }; - // This array is used for a quick test on whether a reference points into // the collection set or not. Each of the array's elements denotes whether the // corresponding region is in the collection set or not. - G1FastCSetBiasedMappedArray _in_cset_fast_test; + G1InCSetStateFastTestBiasedMappedArray _in_cset_fast_test; public: - inline in_cset_state_t in_cset_state(const oop obj); + inline InCSetState in_cset_state(const oop obj); // Return "TRUE" iff the given object address is in the reserved // region of g1. diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp index 32357bc77..0fd278699 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,41 @@ #include "runtime/orderAccess.inline.hpp" #include "utilities/taskqueue.hpp" +PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) { + switch (dest.value()) { + case InCSetState::Young: + return &_survivor_plab_stats; + case InCSetState::Old: + return &_old_plab_stats; + default: + ShouldNotReachHere(); + return NULL; // Keep some compilers happy + } +} + +size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) { + size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz(); + // Prevent humongous PLAB sizes for two reasons: + // * PLABs are allocated using a similar paths as oops, but should + // never be in a humongous region + // * Allowing humongous PLABs needlessly churns the region free lists + return MIN2(_humongous_object_threshold_in_words, gclab_word_size); +} + +HeapWord* G1CollectedHeap::par_allocate_during_gc(InCSetState dest, + size_t word_size, + AllocationContext_t context) { + switch (dest.value()) { + case InCSetState::Young: + return survivor_attempt_allocation(word_size, context); + case InCSetState::Old: + return old_attempt_allocation(word_size, context); + default: + ShouldNotReachHere(); + return NULL; // Keep some compilers happy + } +} + // Inline functions for G1CollectedHeap inline AllocationContextStats& G1CollectedHeap::allocation_context_stats() { @@ -96,8 +131,8 @@ inline bool G1CollectedHeap::obj_in_cs(oop obj) { } inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size, - unsigned int* gc_count_before_ret, - int* gclocker_retry_count_ret) { + uint* gc_count_before_ret, + uint* gclocker_retry_count_ret) { assert_heap_not_locked_and_not_at_safepoint(); assert(!isHumongous(word_size), "attempt_allocation() should not " "be called for humongous allocation requests"); @@ -203,7 +238,7 @@ bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) { return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj); } -G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) { +InCSetState G1CollectedHeap::in_cset_state(const oop obj) { return _in_cset_fast_test.at((HeapWord*)obj); } @@ -313,20 +348,30 @@ inline bool G1CollectedHeap::is_obj_ill(const oop obj) const { return is_obj_ill(obj, heap_region_containing(obj)); } +inline void G1CollectedHeap::set_humongous_reclaim_candidate(uint region, bool value) { + assert(_hrm.at(region)->startsHumongous(), "Must start a humongous object"); + _humongous_reclaim_candidates.set_candidate(region, value); +} + +inline bool G1CollectedHeap::is_humongous_reclaim_candidate(uint region) { + assert(_hrm.at(region)->startsHumongous(), "Must start a humongous object"); + return _humongous_reclaim_candidates.is_candidate(region); +} + inline void G1CollectedHeap::set_humongous_is_live(oop obj) { uint region = addr_to_region((HeapWord*)obj); - // We not only set the "live" flag in the humongous_is_live table, but also + // Clear the flag in the humongous_reclaim_candidates table. Also // reset the entry in the _in_cset_fast_test table so that subsequent references // to the same humongous object do not go into the slow path again. // This is racy, as multiple threads may at the same time enter here, but this // is benign. - // During collection we only ever set the "live" flag, and only ever clear the + // During collection we only ever clear the "candidate" flag, and only ever clear the // entry in the in_cset_fast_table. // We only ever evaluate the contents of these tables (in the VM thread) after // having synchronized the worker threads with the VM thread, or in the same // thread (i.e. within the VM thread). - if (!_humongous_is_live.is_live(region)) { - _humongous_is_live.set_live(region); + if (is_humongous_reclaim_candidate(region)) { + set_humongous_reclaim_candidate(region, false); _in_cset_fast_test.clear_humongous(region); } } diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp index a9d9543ff..ea80c4492 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -1084,7 +1084,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua if (update_stats) { double cost_per_card_ms = 0.0; if (_pending_cards > 0) { - cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards; + cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards; _cost_per_card_ms_seq->add(cost_per_card_ms); } @@ -1092,7 +1092,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua double cost_per_entry_ms = 0.0; if (cards_scanned > 10) { - cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned; + cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned; if (_last_gc_was_young) { _cost_per_entry_ms_seq->add(cost_per_entry_ms); } else { @@ -1134,7 +1134,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua double cost_per_byte_ms = 0.0; if (copied_bytes > 0) { - cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes; + cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes; if (_in_marking_window) { _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); } else { @@ -1143,8 +1143,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua } double all_other_time_ms = pause_time_ms - - (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time() - + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time()); + (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) + + phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination)); double young_other_time_ms = 0.0; if (young_cset_region_length() > 0) { @@ -1185,8 +1185,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(), - phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms); + adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS), + phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms); _collectionSetChooser->verify(); } @@ -1437,18 +1437,6 @@ bool G1CollectorPolicy::can_expand_young_list() { return young_list_length < young_list_max_length; } -uint G1CollectorPolicy::max_regions(int purpose) { - switch (purpose) { - case GCAllocForSurvived: - return _max_survivor_regions; - case GCAllocForTenured: - return REGIONS_UNLIMITED; - default: - ShouldNotReachHere(); - return REGIONS_UNLIMITED; - }; -} - void G1CollectorPolicy::update_max_gc_locker_expansion() { uint expansion_region_num = 0; if (GCLockerEdenExpansionPercent > 0) { @@ -1683,7 +1671,7 @@ void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) { hr->set_next_in_collection_set(_collection_set); _collection_set = hr; _collection_set_bytes_used_before += hr->used(); - _g1->register_region_with_in_cset_fast_test(hr); + _g1->register_old_region_with_in_cset_fast_test(hr); size_t rs_length = hr->rem_set()->occupied(); _recorded_rs_lengths += rs_length; _old_cset_region_length += 1; @@ -1816,7 +1804,7 @@ void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) { hr->set_in_collection_set(true); assert( hr->next_in_collection_set() == NULL, "invariant"); - _g1->register_region_with_in_cset_fast_test(hr); + _g1->register_young_region_with_in_cset_fast_test(hr); } // Add the region at the RHS of the incremental cset @@ -2189,19 +2177,19 @@ void TraceGen0TimeData::record_end_collection(double pause_time_ms, G1GCPhaseTim _other.add(pause_time_ms - phase_times->accounted_time_ms()); _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms()); _parallel.add(phase_times->cur_collection_par_time_ms()); - _ext_root_scan.add(phase_times->average_last_ext_root_scan_time()); - _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms()); - _update_rs.add(phase_times->average_last_update_rs_time()); - _scan_rs.add(phase_times->average_last_scan_rs_time()); - _obj_copy.add(phase_times->average_last_obj_copy_time()); - _termination.add(phase_times->average_last_termination_time()); - - double parallel_known_time = phase_times->average_last_ext_root_scan_time() + - phase_times->average_last_satb_filtering_times_ms() + - phase_times->average_last_update_rs_time() + - phase_times->average_last_scan_rs_time() + - phase_times->average_last_obj_copy_time() + - + phase_times->average_last_termination_time(); + _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan)); + _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering)); + _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS)); + _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS)); + _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy)); + _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination)); + + double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) + + phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) + + phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) + + phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) + + phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) + + phase_times->average_time_ms(G1GCPhaseTimes::Termination); double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time; _parallel_other.add(parallel_other_time); diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp index 6f9e2a1f5..96e4dc63d 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -877,28 +877,20 @@ private: public: uint tenuring_threshold() const { return _tenuring_threshold; } - inline GCAllocPurpose - evacuation_destination(HeapRegion* src_region, uint age, size_t word_sz) { - if (age < _tenuring_threshold && src_region->is_young()) { - return GCAllocForSurvived; - } else { - return GCAllocForTenured; - } - } - - inline bool track_object_age(GCAllocPurpose purpose) { - return purpose == GCAllocForSurvived; - } - static const uint REGIONS_UNLIMITED = (uint) -1; - uint max_regions(int purpose); - - // The limit on regions for a particular purpose is reached. - void note_alloc_region_limit_reached(int purpose) { - if (purpose == GCAllocForSurvived) { - _tenuring_threshold = 0; + uint max_regions(InCSetState dest) { + switch (dest.value()) { + case InCSetState::Young: + return _max_survivor_regions; + case InCSetState::Old: + return REGIONS_UNLIMITED; + default: + assert(false, err_msg("Unknown dest state: " CSETSTATE_FORMAT, dest.value())); + break; } + // keep some compilers happy + return 0; } void note_start_adding_survivor_regions() { diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp index a13351034..9891e1062 100644 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,12 +22,13 @@ * */ - #include "precompiled.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1GCPhaseTimes.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1StringDedup.hpp" +#include "memory/allocation.hpp" +#include "runtime/os.hpp" // Helper class for avoiding interleaved logging class LineBuffer: public StackObj { @@ -70,184 +71,258 @@ public: va_end(ap); } + void print_cr() { + gclog_or_tty->print_cr("%s", _buffer); + _cur = _indent_level * INDENT_CHARS; + } + void append_and_print_cr(const char* format, ...) ATTRIBUTE_PRINTF(2, 3) { va_list ap; va_start(ap, format); vappend(format, ap); va_end(ap); - gclog_or_tty->print_cr("%s", _buffer); - _cur = _indent_level * INDENT_CHARS; + print_cr(); } }; -PRAGMA_DIAG_PUSH -PRAGMA_FORMAT_NONLITERAL_IGNORED template <class T> -void WorkerDataArray<T>::print(int level, const char* title) { - if (_length == 1) { - // No need for min, max, average and sum for only one worker - LineBuffer buf(level); - buf.append("[%s: ", title); - buf.append(_print_format, _data[0]); - buf.append_and_print_cr("]"); - return; +class WorkerDataArray : public CHeapObj<mtGC> { + friend class G1GCParPhasePrinter; + T* _data; + uint _length; + const char* _title; + bool _print_sum; + int _log_level; + uint _indent_level; + bool _enabled; + + WorkerDataArray<size_t>* _thread_work_items; + + NOT_PRODUCT(T uninitialized();) + + // We are caching the sum and average to only have to calculate them once. + // This is not done in an MT-safe way. It is intended to allow single + // threaded code to call sum() and average() multiple times in any order + // without having to worry about the cost. + bool _has_new_data; + T _sum; + T _min; + T _max; + double _average; + + public: + WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) : + _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level), + _has_new_data(true), _thread_work_items(NULL), _enabled(true) { + assert(length > 0, "Must have some workers to store data for"); + _length = length; + _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); } - T min = _data[0]; - T max = _data[0]; - T sum = 0; + ~WorkerDataArray() { + FREE_C_HEAP_ARRAY(T, _data, mtGC); + } - LineBuffer buf(level); - buf.append("[%s:", title); - for (uint i = 0; i < _length; ++i) { - T val = _data[i]; - min = MIN2(val, min); - max = MAX2(val, max); - sum += val; - if (G1Log::finest()) { - buf.append(" "); - buf.append(_print_format, val); + void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) { + _thread_work_items = thread_work_items; + } + + WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; } + + void set(uint worker_i, T value) { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title)); + _data[worker_i] = value; + _has_new_data = true; + } + + void set_thread_work_item(uint worker_i, size_t value) { + assert(_thread_work_items != NULL, "No sub count"); + _thread_work_items->set(worker_i, value); + } + + T get(uint worker_i) { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i)); + return _data[worker_i]; + } + + void add(uint worker_i, T value) { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i)); + _data[worker_i] += value; + _has_new_data = true; + } + + double average(){ + calculate_totals(); + return _average; + } + + T sum() { + calculate_totals(); + return _sum; + } + + T minimum() { + calculate_totals(); + return _min; + } + + T maximum() { + calculate_totals(); + return _max; + } + + void reset() PRODUCT_RETURN; + void verify() PRODUCT_RETURN; + + void set_enabled(bool enabled) { _enabled = enabled; } + + int log_level() { return _log_level; } + + private: + + void calculate_totals(){ + if (!_has_new_data) { + return; + } + + _sum = (T)0; + _min = _data[0]; + _max = _min; + for (uint i = 0; i < _length; ++i) { + T val = _data[i]; + _sum += val; + _min = MIN2(_min, val); + _max = MAX2(_max, val); } + _average = (double)_sum / (double)_length; + _has_new_data = false; } +}; - if (G1Log::finest()) { - buf.append_and_print_cr("%s", ""); - } - - double avg = (double)sum / (double)_length; - buf.append(" Min: "); - buf.append(_print_format, min); - buf.append(", Avg: "); - buf.append("%.1lf", avg); // Always print average as a double - buf.append(", Max: "); - buf.append(_print_format, max); - buf.append(", Diff: "); - buf.append(_print_format, max - min); - if (_print_sum) { - // for things like the start and end times the sum is not - // that relevant - buf.append(", Sum: "); - buf.append(_print_format, sum); - } - buf.append_and_print_cr("]"); -} -PRAGMA_DIAG_POP #ifndef PRODUCT -template <> const int WorkerDataArray<int>::_uninitialized = -1; -template <> const double WorkerDataArray<double>::_uninitialized = -1.0; -template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1; +template <> +size_t WorkerDataArray<size_t>::uninitialized() { + return (size_t)-1; +} + +template <> +double WorkerDataArray<double>::uninitialized() { + return -1.0; +} template <class T> void WorkerDataArray<T>::reset() { for (uint i = 0; i < _length; i++) { - _data[i] = (T)_uninitialized; + _data[i] = WorkerDataArray<T>::uninitialized(); + } + if (_thread_work_items != NULL) { + _thread_work_items->reset(); } } template <class T> void WorkerDataArray<T>::verify() { + if (!_enabled) { + return; + } + for (uint i = 0; i < _length; i++) { - assert(_data[i] != _uninitialized, - err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf", - i, (double)_data[i], (double)_uninitialized)); + assert(_data[i] != WorkerDataArray<T>::uninitialized(), + err_msg("Invalid data for worker %u in '%s'", i, _title)); + } + if (_thread_work_items != NULL) { + _thread_work_items->verify(); } } #endif G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) : - _max_gc_threads(max_gc_threads), - _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false), - _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"), - _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"), - _last_update_rs_times_ms(_max_gc_threads, "%.1lf"), - _last_update_rs_processed_buffers(_max_gc_threads, "%d"), - _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"), - _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"), - _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"), - _last_termination_times_ms(_max_gc_threads, "%.1lf"), - _last_termination_attempts(_max_gc_threads, SIZE_FORMAT), - _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false), - _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"), - _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"), - _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"), - _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT), - _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"), - _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf") + _max_gc_threads(max_gc_threads) { assert(max_gc_threads > 0, "Must have some GC threads"); + + _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2); + _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2); + + // Root scanning phases + _gc_par_phases[ThreadRoots] = new WorkerDataArray<double>(max_gc_threads, "Thread Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[StringTableRoots] = new WorkerDataArray<double>(max_gc_threads, "StringTable Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[UniverseRoots] = new WorkerDataArray<double>(max_gc_threads, "Universe Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[JNIRoots] = new WorkerDataArray<double>(max_gc_threads, "JNI Handles Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[ObjectSynchronizerRoots] = new WorkerDataArray<double>(max_gc_threads, "ObjectSynchronizer Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[FlatProfilerRoots] = new WorkerDataArray<double>(max_gc_threads, "FlatProfiler Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[ManagementRoots] = new WorkerDataArray<double>(max_gc_threads, "Management Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[SystemDictionaryRoots] = new WorkerDataArray<double>(max_gc_threads, "SystemDictionary Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[CLDGRoots] = new WorkerDataArray<double>(max_gc_threads, "CLDG Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[CodeCacheRoots] = new WorkerDataArray<double>(max_gc_threads, "CodeCache Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[CMRefRoots] = new WorkerDataArray<double>(max_gc_threads, "CM RefProcessor Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong CLD (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[WeakCLDRoots] = new WorkerDataArray<double>(max_gc_threads, "Weak CLD Roots (ms)", true, G1Log::LevelFinest, 3); + _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFinest, 3); + + _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2); + _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2); + + _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3); + _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers); + + _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3); + _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts); + + _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2); + + _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3); + _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3); + _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards); } -void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) { +void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) { assert(active_gc_threads > 0, "The number of threads must be > 0"); - assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads"); + assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads"); _active_gc_threads = active_gc_threads; - _last_gc_worker_start_times_ms.reset(); - _last_ext_root_scan_times_ms.reset(); - _last_satb_filtering_times_ms.reset(); - _last_update_rs_times_ms.reset(); - _last_update_rs_processed_buffers.reset(); - _last_scan_rs_times_ms.reset(); - _last_strong_code_root_scan_times_ms.reset(); - _last_obj_copy_times_ms.reset(); - _last_termination_times_ms.reset(); - _last_termination_attempts.reset(); - _last_gc_worker_end_times_ms.reset(); - _last_gc_worker_times_ms.reset(); - _last_gc_worker_other_times_ms.reset(); - - _last_redirty_logged_cards_time_ms.reset(); - _last_redirty_logged_cards_processed_cards.reset(); + for (int i = 0; i < GCParPhasesSentinel; i++) { + _gc_par_phases[i]->reset(); + } + _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled()); + _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled()); } void G1GCPhaseTimes::note_gc_end() { - _last_gc_worker_start_times_ms.verify(); - _last_ext_root_scan_times_ms.verify(); - _last_satb_filtering_times_ms.verify(); - _last_update_rs_times_ms.verify(); - _last_update_rs_processed_buffers.verify(); - _last_scan_rs_times_ms.verify(); - _last_strong_code_root_scan_times_ms.verify(); - _last_obj_copy_times_ms.verify(); - _last_termination_times_ms.verify(); - _last_termination_attempts.verify(); - _last_gc_worker_end_times_ms.verify(); - for (uint i = 0; i < _active_gc_threads; i++) { - double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i); - _last_gc_worker_times_ms.set(i, worker_time); - - double worker_known_time = _last_ext_root_scan_times_ms.get(i) + - _last_satb_filtering_times_ms.get(i) + - _last_update_rs_times_ms.get(i) + - _last_scan_rs_times_ms.get(i) + - _last_strong_code_root_scan_times_ms.get(i) + - _last_obj_copy_times_ms.get(i) + - _last_termination_times_ms.get(i); - - double worker_other_time = worker_time - worker_known_time; - _last_gc_worker_other_times_ms.set(i, worker_other_time); + double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i); + record_time_secs(GCWorkerTotal, i , worker_time); + + double worker_known_time = + _gc_par_phases[ExtRootScan]->get(i) + + _gc_par_phases[SATBFiltering]->get(i) + + _gc_par_phases[UpdateRS]->get(i) + + _gc_par_phases[ScanRS]->get(i) + + _gc_par_phases[CodeRoots]->get(i) + + _gc_par_phases[ObjCopy]->get(i) + + _gc_par_phases[Termination]->get(i); + + record_time_secs(Other, i, worker_time - worker_known_time); } - _last_gc_worker_times_ms.verify(); - _last_gc_worker_other_times_ms.verify(); - - _last_redirty_logged_cards_time_ms.verify(); - _last_redirty_logged_cards_processed_cards.verify(); -} - -void G1GCPhaseTimes::note_string_dedup_fixup_start() { - _cur_string_dedup_queue_fixup_worker_times_ms.reset(); - _cur_string_dedup_table_fixup_worker_times_ms.reset(); -} - -void G1GCPhaseTimes::note_string_dedup_fixup_end() { - _cur_string_dedup_queue_fixup_worker_times_ms.verify(); - _cur_string_dedup_table_fixup_worker_times_ms.verify(); + for (int i = 0; i < GCParPhasesSentinel; i++) { + _gc_par_phases[i]->verify(); + } } void G1GCPhaseTimes::print_stats(int level, const char* str, double value) { @@ -259,7 +334,7 @@ void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) { } void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) { - LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers); + LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers); } double G1GCPhaseTimes::accounted_time_ms() { @@ -287,46 +362,172 @@ double G1GCPhaseTimes::accounted_time_ms() { return misc_time_ms; } -void G1GCPhaseTimes::print(double pause_time_sec) { - if (_root_region_scan_wait_time_ms > 0.0) { - print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms); +// record the time a phase took in seconds +void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) { + _gc_par_phases[phase]->set(worker_i, secs); +} + +// add a number of seconds to a phase +void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) { + _gc_par_phases[phase]->add(worker_i, secs); +} + +void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) { + _gc_par_phases[phase]->set_thread_work_item(worker_i, count); +} + +// return the average time for a phase in milliseconds +double G1GCPhaseTimes::average_time_ms(GCParPhases phase) { + return _gc_par_phases[phase]->average() * 1000.0; +} + +double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) { + return _gc_par_phases[phase]->get(worker_i) * 1000.0; +} + +double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) { + return _gc_par_phases[phase]->sum() * 1000.0; +} + +double G1GCPhaseTimes::min_time_ms(GCParPhases phase) { + return _gc_par_phases[phase]->minimum() * 1000.0; +} + +double G1GCPhaseTimes::max_time_ms(GCParPhases phase) { + return _gc_par_phases[phase]->maximum() * 1000.0; +} + +size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) { + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items()->get(worker_i); +} + +size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) { + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items()->sum(); +} + +double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) { + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items()->average(); +} + +size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) { + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items()->minimum(); +} + +size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) { + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items()->maximum(); +} + +class G1GCParPhasePrinter : public StackObj { + G1GCPhaseTimes* _phase_times; + public: + G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {} + + void print(G1GCPhaseTimes::GCParPhases phase_id) { + WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id]; + + if (phase->_log_level > G1Log::level() || !phase->_enabled) { + return; + } + + if (phase->_length == 1) { + print_single_length(phase_id, phase); + } else { + print_multi_length(phase_id, phase); + } + } + + private: + + void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { + // No need for min, max, average and sum for only one worker + LineBuffer buf(phase->_indent_level); + buf.append_and_print_cr("[%s: %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0)); + + if (phase->_thread_work_items != NULL) { + LineBuffer buf2(phase->_thread_work_items->_indent_level); + buf2.append_and_print_cr("[%s: "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id)); + } } - if (G1CollectedHeap::use_parallel_gc_threads()) { - print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads); - _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)"); - _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)"); - if (_last_satb_filtering_times_ms.sum() > 0.0) { - _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)"); + + void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { + for (uint i = 0; i < phase->_length; ++i) { + buf.append(" %.1lf", _phase_times->get_time_ms(phase_id, i)); + } + buf.print_cr(); + } + + void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) { + for (uint i = 0; i < thread_work_items->_length; ++i) { + buf.append(" " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i)); + } + buf.print_cr(); + } + + void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) { + LineBuffer buf(thread_work_items->_indent_level); + buf.append("[%s:", thread_work_items->_title); + + if (G1Log::finest()) { + print_count_values(buf, phase_id, thread_work_items); } - _last_update_rs_times_ms.print(2, "Update RS (ms)"); - _last_update_rs_processed_buffers.print(3, "Processed Buffers"); - _last_scan_rs_times_ms.print(2, "Scan RS (ms)"); - _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)"); - _last_obj_copy_times_ms.print(2, "Object Copy (ms)"); - _last_termination_times_ms.print(2, "Termination (ms)"); + + assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title)); + + buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]", + _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id), + _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id)); + } + + void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { + LineBuffer buf(phase->_indent_level); + buf.append("[%s:", phase->_title); + if (G1Log::finest()) { - _last_termination_attempts.print(3, "Termination Attempts"); + print_time_values(buf, phase_id, phase); } - _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)"); - _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)"); - _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)"); - } else { - _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)"); - if (_last_satb_filtering_times_ms.sum() > 0.0) { - _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)"); + + buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf", + _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id), + _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id)); + + if (phase->_print_sum) { + // for things like the start and end times the sum is not + // that relevant + buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id)); + } + + buf.append_and_print_cr("]"); + + if (phase->_thread_work_items != NULL) { + print_thread_work_items(phase_id, phase->_thread_work_items); } - _last_update_rs_times_ms.print(1, "Update RS (ms)"); - _last_update_rs_processed_buffers.print(2, "Processed Buffers"); - _last_scan_rs_times_ms.print(1, "Scan RS (ms)"); - _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)"); - _last_obj_copy_times_ms.print(1, "Object Copy (ms)"); } +}; + +void G1GCPhaseTimes::print(double pause_time_sec) { + G1GCParPhasePrinter par_phase_printer(this); + + if (_root_region_scan_wait_time_ms > 0.0) { + print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms); + } + + print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads); + for (int i = 0; i <= GCMainParPhasesLast; i++) { + par_phase_printer.print((GCParPhases) i); + } + print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms); print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms); if (G1StringDedup::is_enabled()) { print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads); - _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)"); - _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)"); + for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) { + par_phase_printer.print((GCParPhases) i); + } } print_stats(1, "Clear CT", _cur_clear_ct_time_ms); double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms(); @@ -350,15 +551,16 @@ void G1GCPhaseTimes::print(double pause_time_sec) { print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms); - if (G1Log::finest()) { - _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty"); - _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards"); - } - if (G1ReclaimDeadHumongousObjectsAtYoungGC) { - print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms); + par_phase_printer.print(RedirtyCards); + + if (G1EagerReclaimHumongousObjects) { + print_stats(2, "Humongous Register", _cur_fast_reclaim_humongous_register_time_ms); if (G1Log::finest()) { print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total); print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates); + } + print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms); + if (G1Log::finest()) { print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed); } } @@ -373,3 +575,17 @@ void G1GCPhaseTimes::print(double pause_time_sec) { print_stats(2, "Verify After", _cur_verify_after_time_ms); } } + +G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) : + _phase_times(phase_times), _phase(phase), _worker_id(worker_id) { + if (_phase_times != NULL) { + _start_time = os::elapsedTime(); + } +} + +G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() { + if (_phase_times != NULL) { + _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time); + } +} + diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp index 8421eb07b..83e7235c4 100644 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,106 +26,60 @@ #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP #include "memory/allocation.hpp" -#include "gc_interface/gcCause.hpp" -template <class T> -class WorkerDataArray : public CHeapObj<mtGC> { - T* _data; - uint _length; - const char* _print_format; - bool _print_sum; +class LineBuffer; - NOT_PRODUCT(static const T _uninitialized;) - - // We are caching the sum and average to only have to calculate them once. - // This is not done in an MT-safe way. It is intended to allow single - // threaded code to call sum() and average() multiple times in any order - // without having to worry about the cost. - bool _has_new_data; - T _sum; - double _average; - - public: - WorkerDataArray(uint length, const char* print_format, bool print_sum = true) : - _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) { - assert(length > 0, "Must have some workers to store data for"); - _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); - } - - ~WorkerDataArray() { - FREE_C_HEAP_ARRAY(T, _data, mtGC); - } - - void set(uint worker_i, T value) { - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); - assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i)); - _data[worker_i] = value; - _has_new_data = true; - } - - T get(uint worker_i) { - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); - assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i)); - return _data[worker_i]; - } - - void add(uint worker_i, T value) { - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); - assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i)); - _data[worker_i] += value; - _has_new_data = true; - } - - double average(){ - if (_has_new_data) { - calculate_totals(); - } - return _average; - } - - T sum() { - if (_has_new_data) { - calculate_totals(); - } - return _sum; - } - - void print(int level, const char* title); - - void reset() PRODUCT_RETURN; - void verify() PRODUCT_RETURN; - - private: - - void calculate_totals(){ - _sum = (T)0; - for (uint i = 0; i < _length; ++i) { - _sum += _data[i]; - } - _average = (double)_sum / (double)_length; - _has_new_data = false; - } -}; +template <class T> class WorkerDataArray; class G1GCPhaseTimes : public CHeapObj<mtGC> { + friend class G1GCParPhasePrinter; - private: uint _active_gc_threads; uint _max_gc_threads; - WorkerDataArray<double> _last_gc_worker_start_times_ms; - WorkerDataArray<double> _last_ext_root_scan_times_ms; - WorkerDataArray<double> _last_satb_filtering_times_ms; - WorkerDataArray<double> _last_update_rs_times_ms; - WorkerDataArray<int> _last_update_rs_processed_buffers; - WorkerDataArray<double> _last_scan_rs_times_ms; - WorkerDataArray<double> _last_strong_code_root_scan_times_ms; - WorkerDataArray<double> _last_obj_copy_times_ms; - WorkerDataArray<double> _last_termination_times_ms; - WorkerDataArray<size_t> _last_termination_attempts; - WorkerDataArray<double> _last_gc_worker_end_times_ms; - WorkerDataArray<double> _last_gc_worker_times_ms; - WorkerDataArray<double> _last_gc_worker_other_times_ms; + public: + enum GCParPhases { + GCWorkerStart, + ExtRootScan, + ThreadRoots, + StringTableRoots, + UniverseRoots, + JNIRoots, + ObjectSynchronizerRoots, + FlatProfilerRoots, + ManagementRoots, + SystemDictionaryRoots, + CLDGRoots, + JVMTIRoots, + CodeCacheRoots, + CMRefRoots, + WaitForStrongCLD, + WeakCLDRoots, + SATBFiltering, + UpdateRS, + ScanRS, + CodeRoots, + ObjCopy, + Termination, + Other, + GCWorkerTotal, + GCWorkerEnd, + StringDedupQueueFixup, + StringDedupTableFixup, + RedirtyCards, + GCParPhasesSentinel + }; + + private: + // Markers for grouping the phases in the GCPhases enum above + static const int GCMainParPhasesLast = GCWorkerEnd; + static const int StringDedupPhasesFirst = StringDedupQueueFixup; + static const int StringDedupPhasesLast = StringDedupTableFixup; + + WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel]; + WorkerDataArray<size_t>* _update_rs_processed_buffers; + WorkerDataArray<size_t>* _termination_attempts; + WorkerDataArray<size_t>* _redirtied_cards; double _cur_collection_par_time_ms; double _cur_collection_code_root_fixup_time_ms; @@ -135,9 +89,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { double _cur_evac_fail_restore_remsets; double _cur_evac_fail_remove_self_forwards; - double _cur_string_dedup_fixup_time_ms; - WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms; - WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms; + double _cur_string_dedup_fixup_time_ms; double _cur_clear_ct_time_ms; double _cur_ref_proc_time_ms; @@ -149,14 +101,13 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { double _recorded_young_cset_choice_time_ms; double _recorded_non_young_cset_choice_time_ms; - WorkerDataArray<double> _last_redirty_logged_cards_time_ms; - WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards; double _recorded_redirty_logged_cards_time_ms; double _recorded_young_free_cset_time_ms; double _recorded_non_young_free_cset_time_ms; double _cur_fast_reclaim_humongous_time_ms; + double _cur_fast_reclaim_humongous_register_time_ms; size_t _cur_fast_reclaim_humongous_total; size_t _cur_fast_reclaim_humongous_candidates; size_t _cur_fast_reclaim_humongous_reclaimed; @@ -171,54 +122,34 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { public: G1GCPhaseTimes(uint max_gc_threads); - void note_gc_start(uint active_gc_threads); + void note_gc_start(uint active_gc_threads, bool mark_in_progress); void note_gc_end(); void print(double pause_time_sec); - void record_gc_worker_start_time(uint worker_i, double ms) { - _last_gc_worker_start_times_ms.set(worker_i, ms); - } - - void record_ext_root_scan_time(uint worker_i, double ms) { - _last_ext_root_scan_times_ms.set(worker_i, ms); - } - - void record_satb_filtering_time(uint worker_i, double ms) { - _last_satb_filtering_times_ms.set(worker_i, ms); - } - - void record_update_rs_time(uint worker_i, double ms) { - _last_update_rs_times_ms.set(worker_i, ms); - } - - void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) { - _last_update_rs_processed_buffers.set(worker_i, processed_buffers); - } + // record the time a phase took in seconds + void record_time_secs(GCParPhases phase, uint worker_i, double secs); - void record_scan_rs_time(uint worker_i, double ms) { - _last_scan_rs_times_ms.set(worker_i, ms); - } + // add a number of seconds to a phase + void add_time_secs(GCParPhases phase, uint worker_i, double secs); - void record_strong_code_root_scan_time(uint worker_i, double ms) { - _last_strong_code_root_scan_times_ms.set(worker_i, ms); - } + void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count); - void record_obj_copy_time(uint worker_i, double ms) { - _last_obj_copy_times_ms.set(worker_i, ms); - } + // return the average time for a phase in milliseconds + double average_time_ms(GCParPhases phase); - void add_obj_copy_time(uint worker_i, double ms) { - _last_obj_copy_times_ms.add(worker_i, ms); - } + size_t sum_thread_work_items(GCParPhases phase); - void record_termination(uint worker_i, double ms, size_t attempts) { - _last_termination_times_ms.set(worker_i, ms); - _last_termination_attempts.set(worker_i, attempts); - } + private: + double get_time_ms(GCParPhases phase, uint worker_i); + double sum_time_ms(GCParPhases phase); + double min_time_ms(GCParPhases phase); + double max_time_ms(GCParPhases phase); + size_t get_thread_work_item(GCParPhases phase, uint worker_i); + double average_thread_work_items(GCParPhases phase); + size_t min_thread_work_items(GCParPhases phase); + size_t max_thread_work_items(GCParPhases phase); - void record_gc_worker_end_time(uint worker_i, double ms) { - _last_gc_worker_end_times_ms.set(worker_i, ms); - } + public: void record_clear_ct_time(double ms) { _cur_clear_ct_time_ms = ms; @@ -248,21 +179,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { _cur_evac_fail_remove_self_forwards = ms; } - void note_string_dedup_fixup_start(); - void note_string_dedup_fixup_end(); - void record_string_dedup_fixup_time(double ms) { _cur_string_dedup_fixup_time_ms = ms; } - void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) { - _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms); - } - - void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) { - _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms); - } - void record_ref_proc_time(double ms) { _cur_ref_proc_time_ms = ms; } @@ -283,7 +203,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { _recorded_non_young_free_cset_time_ms = time_ms; } - void record_fast_reclaim_humongous_stats(size_t total, size_t candidates) { + void record_fast_reclaim_humongous_stats(double time_ms, size_t total, size_t candidates) { + _cur_fast_reclaim_humongous_register_time_ms = time_ms; _cur_fast_reclaim_humongous_total = total; _cur_fast_reclaim_humongous_candidates = candidates; } @@ -301,14 +222,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { _recorded_non_young_cset_choice_time_ms = time_ms; } - void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) { - _last_redirty_logged_cards_time_ms.set(worker_i, time_ms); - } - - void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) { - _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers); - } - void record_redirty_logged_cards_time_ms(double time_ms) { _recorded_redirty_logged_cards_time_ms = time_ms; } @@ -362,38 +275,16 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> { double fast_reclaim_humongous_time_ms() { return _cur_fast_reclaim_humongous_time_ms; } +}; - double average_last_update_rs_time() { - return _last_update_rs_times_ms.average(); - } - - int sum_last_update_rs_processed_buffers() { - return _last_update_rs_processed_buffers.sum(); - } - - double average_last_scan_rs_time(){ - return _last_scan_rs_times_ms.average(); - } - - double average_last_strong_code_root_scan_time(){ - return _last_strong_code_root_scan_times_ms.average(); - } - - double average_last_obj_copy_time() { - return _last_obj_copy_times_ms.average(); - } - - double average_last_termination_time() { - return _last_termination_times_ms.average(); - } - - double average_last_ext_root_scan_time() { - return _last_ext_root_scan_times_ms.average(); - } - - double average_last_satb_filtering_times_ms() { - return _last_satb_filtering_times_ms.average(); - } +class G1GCParPhaseTimesTracker : public StackObj { + double _start_time; + G1GCPhaseTimes::GCParPhases _phase; + G1GCPhaseTimes* _phase_times; + uint _worker_id; +public: + G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id); + ~G1GCParPhaseTimesTracker(); }; #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp index 0c51c318f..f1f807c67 100644 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,16 +36,13 @@ void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) { if (default_use_cache()) { _use_cache = true; - _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize; _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); - _n_hot = 0; - _hot_cache_idx = 0; + reset_hot_cache_internal(); // For refining the cards in the hot cache in parallel - uint n_workers = (ParallelGCThreads > 0 ? - _g1h->workers()->total_workers() : 1); - _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / (int)n_workers); + _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size); _hot_cache_par_claimed_idx = 0; _card_counts.initialize(card_counts_storage); @@ -66,26 +63,21 @@ jbyte* G1HotCardCache::insert(jbyte* card_ptr) { // return it for immediate refining. return card_ptr; } - // Otherwise, the card is hot. - jbyte* res = NULL; - MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); - if (_n_hot == _hot_cache_size) { - res = _hot_cache[_hot_cache_idx]; - _n_hot--; - } - - // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. - _hot_cache[_hot_cache_idx] = card_ptr; - _hot_cache_idx++; - - if (_hot_cache_idx == _hot_cache_size) { - // Wrap around - _hot_cache_idx = 0; - } - _n_hot++; - - return res; + size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1; + size_t masked_index = index & (_hot_cache_size - 1); + jbyte* current_ptr = _hot_cache[masked_index]; + + // Try to store the new card pointer into the cache. Compare-and-swap to guard + // against the unlikely event of a race resulting in another card pointer to + // have already been written to the cache. In this case we will return + // card_ptr in favor of the other option, which would be starting over. This + // should be OK since card_ptr will likely be the older card already when/if + // this ever happens. + jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr, + &_hot_cache[masked_index], + current_ptr); + return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; } void G1HotCardCache::drain(uint worker_i, @@ -98,38 +90,37 @@ void G1HotCardCache::drain(uint worker_i, assert(_hot_cache != NULL, "Logic"); assert(!use_cache(), "cache should be disabled"); - int start_idx; - - while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once - int end_idx = start_idx + _hot_cache_par_chunk_size; - - if (start_idx == - Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { - // The current worker has successfully claimed the chunk [start_idx..end_idx) - end_idx = MIN2(end_idx, _n_hot); - for (int i = start_idx; i < end_idx; i++) { - jbyte* card_ptr = _hot_cache[i]; - if (card_ptr != NULL) { - if (g1rs->refine_card(card_ptr, worker_i, true)) { - // The part of the heap spanned by the card contains references - // that point into the current collection set. - // We need to record the card pointer in the DirtyCardQueueSet - // that we use for such cards. - // - // The only time we care about recording cards that contain - // references that point into the collection set is during - // RSet updating while within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread - assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); - assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), - err_msg("incorrect worker id: "UINT32_FORMAT, worker_i)); - - into_cset_dcq->enqueue(card_ptr); - } + while (_hot_cache_par_claimed_idx < _hot_cache_size) { + size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size, + (volatile intptr_t*)&_hot_cache_par_claimed_idx); + size_t start_idx = end_idx - _hot_cache_par_chunk_size; + // The current worker has successfully claimed the chunk [start_idx..end_idx) + end_idx = MIN2(end_idx, _hot_cache_size); + for (size_t i = start_idx; i < end_idx; i++) { + jbyte* card_ptr = _hot_cache[i]; + if (card_ptr != NULL) { + if (g1rs->refine_card(card_ptr, worker_i, true)) { + // The part of the heap spanned by the card contains references + // that point into the current collection set. + // We need to record the card pointer in the DirtyCardQueueSet + // that we use for such cards. + // + // The only time we care about recording cards that contain + // references that point into the collection set is during + // RSet updating while within an evacuation pause. + // In this case worker_i should be the id of a GC worker thread + assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); + assert(worker_i < ParallelGCThreads, + err_msg("incorrect worker id: %u", worker_i)); + + into_cset_dcq->enqueue(card_ptr); } + } else { + break; } } } + // The existing entries in the hot card cache, which were just refined // above, are discarded prior to re-enabling the cache near the end of the GC. } diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp index becd68327..b065e36ce 100644 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,21 +54,33 @@ class HeapRegion; // code, increasing throughput. class G1HotCardCache: public CHeapObj<mtGC> { - G1CollectedHeap* _g1h; + + G1CollectedHeap* _g1h; + + bool _use_cache; + + G1CardCounts _card_counts; // The card cache table - jbyte** _hot_cache; + jbyte** _hot_cache; + + size_t _hot_cache_size; + + int _hot_cache_par_chunk_size; - int _hot_cache_size; - int _n_hot; - int _hot_cache_idx; + // Avoids false sharing when concurrently updating _hot_cache_idx or + // _hot_cache_par_claimed_idx. These are never updated at the same time + // thus it's not necessary to separate them as well + char _pad_before[DEFAULT_CACHE_LINE_SIZE]; - int _hot_cache_par_chunk_size; - volatile int _hot_cache_par_claimed_idx; + volatile size_t _hot_cache_idx; - bool _use_cache; + volatile size_t _hot_cache_par_claimed_idx; - G1CardCounts _card_counts; + char _pad_after[DEFAULT_CACHE_LINE_SIZE]; + + // The number of cached cards a thread claims when flushing the cache + static const int ClaimChunkSize = 32; bool default_use_cache() const { return (G1ConcRSLogCacheSize > 0); @@ -110,16 +122,25 @@ class G1HotCardCache: public CHeapObj<mtGC> { void reset_hot_cache() { assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); - _hot_cache_idx = 0; _n_hot = 0; + if (default_use_cache()) { + reset_hot_cache_internal(); + } } - bool hot_cache_is_empty() { return _n_hot == 0; } - // Zeros the values in the card counts table for entire committed heap void reset_card_counts(); // Zeros the values in the card counts table for the given region void reset_card_counts(HeapRegion* hr); + + private: + void reset_hot_cache_internal() { + assert(_hot_cache != NULL, "Logic"); + _hot_cache_idx = 0; + for (size_t i = 0; i < _hot_cache_size; i++) { + _hot_cache[i] = NULL; + } + } }; #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP diff --git a/src/share/vm/gc_implementation/g1/g1InCSetState.hpp b/src/share/vm/gc_implementation/g1/g1InCSetState.hpp new file mode 100644 index 000000000..f13eaa0ae --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1InCSetState.hpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP + +#include "gc_implementation/g1/g1BiasedArray.hpp" +#include "memory/allocation.hpp" + +// Per-region state during garbage collection. +struct InCSetState { + public: + // We use different types to represent the state value. Particularly SPARC puts + // values in structs from "left to right", i.e. MSB to LSB. This results in many + // unnecessary shift operations when loading and storing values of this type. + // This degrades performance significantly (>10%) on that platform. + // Other tested ABIs do not seem to have this problem, and actually tend to + // favor smaller types, so we use the smallest usable type there. +#ifdef SPARC + #define CSETSTATE_FORMAT INTPTR_FORMAT + typedef intptr_t in_cset_state_t; +#else + #define CSETSTATE_FORMAT "%d" + typedef int8_t in_cset_state_t; +#endif + private: + in_cset_state_t _value; + public: + enum { + // Selection of the values were driven to micro-optimize the encoding and + // frequency of the checks. + // The most common check is whether the region is in the collection set or not. + // This encoding allows us to use an != 0 check which in some architectures + // (x86*) can be encoded slightly more efficently than a normal comparison + // against zero. + // The same situation occurs when checking whether the region is humongous + // or not, which is encoded by values < 0. + // The other values are simply encoded in increasing generation order, which + // makes getting the next generation fast by a simple increment. + Humongous = -1, // The region is humongous - note that actually any value < 0 would be possible here. + NotInCSet = 0, // The region is not in the collection set. + Young = 1, // The region is in the collection set and a young region. + Old = 2, // The region is in the collection set and an old region. + Num + }; + + InCSetState(in_cset_state_t value = NotInCSet) : _value(value) { + assert(is_valid(), err_msg("Invalid state %d", _value)); + } + + in_cset_state_t value() const { return _value; } + + void set_old() { _value = Old; } + + bool is_in_cset_or_humongous() const { return _value != NotInCSet; } + bool is_in_cset() const { return _value > NotInCSet; } + bool is_humongous() const { return _value < NotInCSet; } + bool is_young() const { return _value == Young; } + bool is_old() const { return _value == Old; } + +#ifdef ASSERT + bool is_default() const { return !is_in_cset_or_humongous(); } + bool is_valid() const { return (_value >= Humongous) && (_value < Num); } + bool is_valid_gen() const { return (_value >= Young && _value <= Old); } +#endif +}; + +// Instances of this class are used for quick tests on whether a reference points +// into the collection set and into which generation or is a humongous object +// +// Each of the array's elements indicates whether the corresponding region is in +// the collection set and if so in which generation, or a humongous region. +// +// We use this to speed up reference processing during young collection and +// quickly reclaim humongous objects. For the latter, by making a humongous region +// succeed this test, we sort-of add it to the collection set. During the reference +// iteration closures, when we see a humongous region, we then simply mark it as +// referenced, i.e. live. +class G1InCSetStateFastTestBiasedMappedArray : public G1BiasedMappedArray<InCSetState> { + protected: + InCSetState default_value() const { return InCSetState::NotInCSet; } + public: + void set_humongous(uintptr_t index) { + assert(get_by_index(index).is_default(), + err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value())); + set_by_index(index, InCSetState::Humongous); + } + + void clear_humongous(uintptr_t index) { + set_by_index(index, InCSetState::NotInCSet); + } + + void set_in_young(uintptr_t index) { + assert(get_by_index(index).is_default(), + err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value())); + set_by_index(index, InCSetState::Young); + } + + void set_in_old(uintptr_t index) { + assert(get_by_index(index).is_default(), + err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value())); + set_by_index(index, InCSetState::Old); + } + + bool is_in_cset_or_humongous(HeapWord* addr) const { return at(addr).is_in_cset_or_humongous(); } + bool is_in_cset(HeapWord* addr) const { return at(addr).is_in_cset(); } + InCSetState at(HeapWord* addr) const { return get_by_address(addr); } + void clear() { G1BiasedMappedArray<InCSetState>::clear(); } +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP diff --git a/src/share/vm/gc_implementation/g1/g1Log.hpp b/src/share/vm/gc_implementation/g1/g1Log.hpp index b8da001cf..6f72c8fbc 100644 --- a/src/share/vm/gc_implementation/g1/g1Log.hpp +++ b/src/share/vm/gc_implementation/g1/g1Log.hpp @@ -28,6 +28,7 @@ #include "memory/allocation.hpp" class G1Log : public AllStatic { + public: typedef enum { LevelNone, LevelFine, @@ -35,6 +36,7 @@ class G1Log : public AllStatic { LevelFinest } LogLevel; + private: static LogLevel _level; public: @@ -50,6 +52,10 @@ class G1Log : public AllStatic { return _level == LevelFinest; } + static LogLevel level() { + return _level; + } + static void init(); }; diff --git a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp index 7e5d0873b..b1bc68275 100644 --- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp @@ -31,6 +31,7 @@ #include "code/icBuffer.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1MarkSweep.hpp" +#include "gc_implementation/g1/g1RootProcessor.hpp" #include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/shared/gcHeapSummary.hpp" #include "gc_implementation/shared/gcTimer.hpp" @@ -126,21 +127,22 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id()); GenMarkSweep::trace(" 1"); - SharedHeap* sh = SharedHeap::heap(); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations); - sh->process_strong_roots(true, // activate StrongRootsScope - SharedHeap::SO_None, - &GenMarkSweep::follow_root_closure, - &GenMarkSweep::follow_cld_closure, - &follow_code_closure); + { + G1RootProcessor root_processor(g1h); + root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure, + &GenMarkSweep::follow_cld_closure, + &follow_code_closure); + } // Process reference objects found during marking ReferenceProcessor* rp = GenMarkSweep::ref_processor(); - assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Sanity"); + assert(rp == g1h->ref_processor_stw(), "Sanity"); rp->setup_policy(clear_all_softrefs); const ReferenceProcessorStats& stats = @@ -226,6 +228,12 @@ class G1AdjustPointersClosure: public HeapRegionClosure { } }; +class G1AlwaysTrueClosure: public BoolObjectClosure { +public: + bool do_object_b(oop p) { return true; } +}; +static G1AlwaysTrueClosure always_true; + void G1MarkSweep::mark_sweep_phase3() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -233,24 +241,23 @@ void G1MarkSweep::mark_sweep_phase3() { GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id()); GenMarkSweep::trace("3"); - SharedHeap* sh = SharedHeap::heap(); - // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations); - sh->process_all_roots(true, // activate StrongRootsScope - SharedHeap::SO_AllCodeCache, - &GenMarkSweep::adjust_pointer_closure, - &GenMarkSweep::adjust_cld_closure, - &adjust_code_closure); + { + G1RootProcessor root_processor(g1h); + root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure, + &GenMarkSweep::adjust_cld_closure, + &adjust_code_closure); + } assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity"); g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure); // Now adjust pointers in remaining weak roots. (All of which should // have been cleared if they pointed to non-surviving objects.) - sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure); + JNIHandles::weak_oops_do(&always_true, &GenMarkSweep::adjust_pointer_closure); if (G1StringDedup::is_enabled()) { G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure); diff --git a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp index 79f8b52f6..4f6e655b5 100644 --- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP #include "memory/iterator.hpp" +#include "oops/markOop.hpp" class HeapRegion; class G1CollectedHeap; @@ -239,14 +240,14 @@ class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure { G1CollectedHeap* _g1; G1RemSet* _g1_rem_set; HeapRegion* _from; - OopsInHeapRegionClosure* _push_ref_cl; + G1ParPushHeapRSClosure* _push_ref_cl; bool _record_refs_into_cset; uint _worker_i; public: G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h, G1RemSet* rs, - OopsInHeapRegionClosure* push_ref_cl, + G1ParPushHeapRSClosure* push_ref_cl, bool record_refs_into_cset, uint worker_i = 0); @@ -256,7 +257,8 @@ public: } bool self_forwarded(oop obj) { - bool result = (obj->is_forwarded() && (obj->forwardee()== obj)); + markOop m = obj->mark(); + bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj)); return result; } diff --git a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp index bcf30b6ba..a00cd7a6c 100644 --- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp @@ -67,8 +67,8 @@ inline void G1ParScanClosure::do_oop_nv(T* p) { if (!oopDesc::is_null(heap_oop)) { oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj); - if (state == G1CollectedHeap::InCSet) { + const InCSetState state = _g1->in_cset_state(obj); + if (state.is_in_cset()) { // We're not going to even bother checking whether the object is // already forwarded or not, as this usually causes an immediate // stall. We'll try to prefetch the object (for write, given that @@ -87,7 +87,7 @@ inline void G1ParScanClosure::do_oop_nv(T* p) { _par_scan_state->push_on_queue(p); } else { - if (state == G1CollectedHeap::IsHumongous) { + if (state.is_humongous()) { _g1->set_humongous_is_live(obj); } _par_scan_state->update_rs(_from, p, _worker_id); diff --git a/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp b/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp index 662d28aee..1a22af82a 100644 --- a/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp +++ b/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -44,20 +44,29 @@ #endif #include "utilities/bitMap.inline.hpp" -G1PageBasedVirtualSpace::G1PageBasedVirtualSpace() : _low_boundary(NULL), - _high_boundary(NULL), _committed(), _page_size(0), _special(false), +G1PageBasedVirtualSpace::G1PageBasedVirtualSpace(ReservedSpace rs, size_t used_size, size_t page_size) : + _low_boundary(NULL), _high_boundary(NULL), _committed(), _page_size(0), _special(false), _dirty(), _executable(false) { + initialize_with_page_size(rs, used_size, page_size); } -bool G1PageBasedVirtualSpace::initialize_with_granularity(ReservedSpace rs, size_t page_size) { - if (!rs.is_reserved()) { - return false; // Allocation failed. - } +void G1PageBasedVirtualSpace::initialize_with_page_size(ReservedSpace rs, size_t used_size, size_t page_size) { + guarantee(rs.is_reserved(), "Given reserved space must have been reserved already."); + assert(_low_boundary == NULL, "VirtualSpace already initialized"); - assert(page_size > 0, "Granularity must be non-zero."); + assert(page_size > 0, "Page size must be non-zero."); + + guarantee(is_ptr_aligned(rs.base(), page_size), + err_msg("Reserved space base " PTR_FORMAT " is not aligned to requested page size " SIZE_FORMAT, p2i(rs.base()), page_size)); + guarantee(is_size_aligned(used_size, os::vm_page_size()), + err_msg("Given used reserved space size needs to be OS page size aligned (%d bytes) but is " SIZE_FORMAT, os::vm_page_size(), used_size)); + guarantee(used_size <= rs.size(), + err_msg("Used size of reserved space " SIZE_FORMAT " bytes is smaller than reservation at " SIZE_FORMAT " bytes", used_size, rs.size())); + guarantee(is_size_aligned(rs.size(), page_size), + err_msg("Expected that the virtual space is size aligned, but " SIZE_FORMAT " is not aligned to page size " SIZE_FORMAT, rs.size(), page_size)); _low_boundary = rs.base(); - _high_boundary = _low_boundary + rs.size(); + _high_boundary = _low_boundary + used_size; _special = rs.special(); _executable = rs.executable(); @@ -65,16 +74,15 @@ bool G1PageBasedVirtualSpace::initialize_with_granularity(ReservedSpace rs, size _page_size = page_size; assert(_committed.size() == 0, "virtual space initialized more than once"); - uintx size_in_bits = rs.size() / page_size; - _committed.resize(size_in_bits, /* in_resource_area */ false); + BitMap::idx_t size_in_pages = rs.size() / page_size; + _committed.resize(size_in_pages, /* in_resource_area */ false); if (_special) { - _dirty.resize(size_in_bits, /* in_resource_area */ false); + _dirty.resize(size_in_pages, /* in_resource_area */ false); } - return true; + _tail_size = used_size % _page_size; } - G1PageBasedVirtualSpace::~G1PageBasedVirtualSpace() { release(); } @@ -87,12 +95,18 @@ void G1PageBasedVirtualSpace::release() { _special = false; _executable = false; _page_size = 0; + _tail_size = 0; _committed.resize(0, false); _dirty.resize(0, false); } size_t G1PageBasedVirtualSpace::committed_size() const { - return _committed.count_one_bits() * _page_size; + size_t result = _committed.count_one_bits() * _page_size; + // The last page might not be in full. + if (is_last_page_partial() && _committed.at(_committed.size() - 1)) { + result -= _page_size - _tail_size; + } + return result; } size_t G1PageBasedVirtualSpace::reserved_size() const { @@ -103,62 +117,134 @@ size_t G1PageBasedVirtualSpace::uncommitted_size() const { return reserved_size() - committed_size(); } -uintptr_t G1PageBasedVirtualSpace::addr_to_page_index(char* addr) const { +size_t G1PageBasedVirtualSpace::addr_to_page_index(char* addr) const { return (addr - _low_boundary) / _page_size; } -bool G1PageBasedVirtualSpace::is_area_committed(uintptr_t start, size_t size_in_pages) const { - uintptr_t end = start + size_in_pages; - return _committed.get_next_zero_offset(start, end) >= end; +bool G1PageBasedVirtualSpace::is_area_committed(size_t start_page, size_t size_in_pages) const { + size_t end_page = start_page + size_in_pages; + return _committed.get_next_zero_offset(start_page, end_page) >= end_page; } -bool G1PageBasedVirtualSpace::is_area_uncommitted(uintptr_t start, size_t size_in_pages) const { - uintptr_t end = start + size_in_pages; - return _committed.get_next_one_offset(start, end) >= end; +bool G1PageBasedVirtualSpace::is_area_uncommitted(size_t start_page, size_t size_in_pages) const { + size_t end_page = start_page + size_in_pages; + return _committed.get_next_one_offset(start_page, end_page) >= end_page; } -char* G1PageBasedVirtualSpace::page_start(uintptr_t index) { +char* G1PageBasedVirtualSpace::page_start(size_t index) const { return _low_boundary + index * _page_size; } -size_t G1PageBasedVirtualSpace::byte_size_for_pages(size_t num) { - return num * _page_size; +bool G1PageBasedVirtualSpace::is_after_last_page(size_t index) const { + guarantee(index <= _committed.size(), + err_msg("Given boundary page " SIZE_FORMAT " is beyond managed page count " SIZE_FORMAT, index, _committed.size())); + return index == _committed.size(); +} + +void G1PageBasedVirtualSpace::commit_preferred_pages(size_t start, size_t num_pages) { + assert(num_pages > 0, "No full pages to commit"); + assert(start + num_pages <= _committed.size(), + err_msg("Tried to commit area from page " SIZE_FORMAT " to page " SIZE_FORMAT " " + "that is outside of managed space of " SIZE_FORMAT " pages", + start, start + num_pages, _committed.size())); + + char* start_addr = page_start(start); + size_t size = num_pages * _page_size; + + os::commit_memory_or_exit(start_addr, size, _page_size, _executable, + err_msg("Failed to commit area from " PTR_FORMAT " to " PTR_FORMAT " of length " SIZE_FORMAT ".", + p2i(start_addr), p2i(start_addr + size), size)); +} + +void G1PageBasedVirtualSpace::commit_tail() { + assert(_tail_size > 0, "The size of the tail area must be > 0 when reaching here"); + + char* const aligned_end_address = (char*)align_ptr_down(_high_boundary, _page_size); + os::commit_memory_or_exit(aligned_end_address, _tail_size, os::vm_page_size(), _executable, + err_msg("Failed to commit tail area from " PTR_FORMAT " to " PTR_FORMAT " of length " SIZE_FORMAT ".", + p2i(aligned_end_address), p2i(_high_boundary), _tail_size)); +} + +void G1PageBasedVirtualSpace::commit_internal(size_t start_page, size_t end_page) { + guarantee(start_page < end_page, + err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page)); + guarantee(end_page <= _committed.size(), + err_msg("Given end page " SIZE_FORMAT " is beyond end of managed page amount of " SIZE_FORMAT, end_page, _committed.size())); + + size_t pages = end_page - start_page; + bool need_to_commit_tail = is_after_last_page(end_page) && is_last_page_partial(); + + // If we have to commit some (partial) tail area, decrease the amount of pages to avoid + // committing that in the full-page commit code. + if (need_to_commit_tail) { + pages--; + } + + if (pages > 0) { + commit_preferred_pages(start_page, pages); + } + + if (need_to_commit_tail) { + commit_tail(); + } +} + +char* G1PageBasedVirtualSpace::bounded_end_addr(size_t end_page) const { + return MIN2(_high_boundary, page_start(end_page)); } -bool G1PageBasedVirtualSpace::commit(uintptr_t start, size_t size_in_pages) { +void G1PageBasedVirtualSpace::pretouch_internal(size_t start_page, size_t end_page) { + guarantee(start_page < end_page, + err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page)); + + os::pretouch_memory(page_start(start_page), bounded_end_addr(end_page)); +} + +bool G1PageBasedVirtualSpace::commit(size_t start_page, size_t size_in_pages) { // We need to make sure to commit all pages covered by the given area. - guarantee(is_area_uncommitted(start, size_in_pages), "Specified area is not uncommitted"); + guarantee(is_area_uncommitted(start_page, size_in_pages), "Specified area is not uncommitted"); bool zero_filled = true; - uintptr_t end = start + size_in_pages; + size_t end_page = start_page + size_in_pages; if (_special) { // Check for dirty pages and update zero_filled if any found. - if (_dirty.get_next_one_offset(start,end) < end) { + if (_dirty.get_next_one_offset(start_page, end_page) < end_page) { zero_filled = false; - _dirty.clear_range(start, end); + _dirty.clear_range(start_page, end_page); } } else { - os::commit_memory_or_exit(page_start(start), byte_size_for_pages(size_in_pages), _executable, - err_msg("Failed to commit pages from "SIZE_FORMAT" of length "SIZE_FORMAT, start, size_in_pages)); + commit_internal(start_page, end_page); } - _committed.set_range(start, end); + _committed.set_range(start_page, end_page); + if (AlwaysPreTouch) { + pretouch_internal(start_page, end_page); + } return zero_filled; } -void G1PageBasedVirtualSpace::uncommit(uintptr_t start, size_t size_in_pages) { - guarantee(is_area_committed(start, size_in_pages), "checking"); +void G1PageBasedVirtualSpace::uncommit_internal(size_t start_page, size_t end_page) { + guarantee(start_page < end_page, + err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page)); + + char* start_addr = page_start(start_page); + os::uncommit_memory(start_addr, pointer_delta(bounded_end_addr(end_page), start_addr, sizeof(char))); +} + +void G1PageBasedVirtualSpace::uncommit(size_t start_page, size_t size_in_pages) { + guarantee(is_area_committed(start_page, size_in_pages), "checking"); + size_t end_page = start_page + size_in_pages; if (_special) { // Mark that memory is dirty. If committed again the memory might // need to be cleared explicitly. - _dirty.set_range(start, start + size_in_pages); + _dirty.set_range(start_page, end_page); } else { - os::uncommit_memory(page_start(start), byte_size_for_pages(size_in_pages)); + uncommit_internal(start_page, end_page); } - _committed.clear_range(start, start + size_in_pages); + _committed.clear_range(start_page, end_page); } bool G1PageBasedVirtualSpace::contains(const void* p) const { @@ -172,7 +258,8 @@ void G1PageBasedVirtualSpace::print_on(outputStream* out) { out->cr(); out->print_cr(" - committed: " SIZE_FORMAT, committed_size()); out->print_cr(" - reserved: " SIZE_FORMAT, reserved_size()); - out->print_cr(" - [low_b, high_b]: [" INTPTR_FORMAT ", " INTPTR_FORMAT "]", p2i(_low_boundary), p2i(_high_boundary)); + out->print_cr(" - preferred page size: " SIZE_FORMAT, _page_size); + out->print_cr(" - [low_b, high_b]: [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(_low_boundary), p2i(_high_boundary)); } void G1PageBasedVirtualSpace::print() { diff --git a/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp b/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp index fb2c78415..4d0b7b21b 100644 --- a/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp +++ b/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp @@ -34,6 +34,12 @@ // granularity. // (De-)Allocation requests are always OS page aligned by passing a page index // and multiples of pages. +// For systems that only commits of memory in a given size (always greater than +// page size) the base address is required to be aligned to that page size. +// The actual size requested need not be aligned to that page size, but the size +// of the reservation passed may be rounded up to this page size. Any fragment +// (less than the page size) of the actual size at the tail of the request will +// be committed using OS small pages. // The implementation gives an error when trying to commit or uncommit pages that // have already been committed or uncommitted. class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC { @@ -43,7 +49,11 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC { char* _low_boundary; char* _high_boundary; - // The commit/uncommit granularity in bytes. + // The size of the tail in bytes of the handled space that needs to be committed + // using small pages. + size_t _tail_size; + + // The preferred page size used for commit/uncommit in bytes. size_t _page_size; // Bitmap used for verification of commit/uncommit operations. @@ -62,30 +72,55 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC { // Indicates whether the committed space should be executable. bool _executable; + // Helper function for committing memory. Commit the given memory range by using + // _page_size pages as much as possible and the remainder with small sized pages. + void commit_internal(size_t start_page, size_t end_page); + // Commit num_pages pages of _page_size size starting from start. All argument + // checking has been performed. + void commit_preferred_pages(size_t start_page, size_t end_page); + // Commit space at the high end of the space that needs to be committed with small + // sized pages. + void commit_tail(); + + // Uncommit the given memory range. + void uncommit_internal(size_t start_page, size_t end_page); + + // Pretouch the given memory range. + void pretouch_internal(size_t start_page, size_t end_page); + // Returns the index of the page which contains the given address. uintptr_t addr_to_page_index(char* addr) const; // Returns the address of the given page index. - char* page_start(uintptr_t index); - // Returns the byte size of the given number of pages. - size_t byte_size_for_pages(size_t num); + char* page_start(size_t index) const; + + // Is the given page index the last page? + bool is_last_page(size_t index) const { return index == (_committed.size() - 1); } + // Is the given page index the first after last page? + bool is_after_last_page(size_t index) const; + // Is the last page only partially covered by this space? + bool is_last_page_partial() const { return !is_ptr_aligned(_high_boundary, _page_size); } + // Returns the end address of the given page bounded by the reserved space. + char* bounded_end_addr(size_t end_page) const; // Returns true if the entire area is backed by committed memory. - bool is_area_committed(uintptr_t start, size_t size_in_pages) const; + bool is_area_committed(size_t start_page, size_t size_in_pages) const; // Returns true if the entire area is not backed by committed memory. - bool is_area_uncommitted(uintptr_t start, size_t size_in_pages) const; + bool is_area_uncommitted(size_t start_page, size_t size_in_pages) const; + void initialize_with_page_size(ReservedSpace rs, size_t used_size, size_t page_size); public: // Commit the given area of pages starting at start being size_in_pages large. // Returns true if the given area is zero filled upon completion. - bool commit(uintptr_t start, size_t size_in_pages); + bool commit(size_t start_page, size_t size_in_pages); // Uncommit the given area of pages starting at start being size_in_pages large. - void uncommit(uintptr_t start, size_t size_in_pages); + void uncommit(size_t start_page, size_t size_in_pages); - // Initialization - G1PageBasedVirtualSpace(); - bool initialize_with_granularity(ReservedSpace rs, size_t page_size); + // Initialize the given reserved space with the given base address and the size + // actually used. + // Prefer to commit in page_size chunks. + G1PageBasedVirtualSpace(ReservedSpace rs, size_t used_size, size_t page_size); // Destruction ~G1PageBasedVirtualSpace(); diff --git a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp index 311b9d008..406ac73d1 100644 --- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp +++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp @@ -38,6 +38,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, _g1_rem(g1h->g1_rem_set()), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), + _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()), _age_table(false), _scanner(g1h, rp), _strong_roots_time(0), _term_time(0) { _scanner.set_par_scan_thread_state(this); @@ -59,6 +60,12 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h); + _dest[InCSetState::NotInCSet] = InCSetState::NotInCSet; + // The dest for Young is used when the objects are aged enough to + // need to be moved to the next space. + _dest[InCSetState::Young] = InCSetState::Old; + _dest[InCSetState::Old] = InCSetState::Old; + _start = os::elapsedTime(); } @@ -150,86 +157,126 @@ void G1ParScanThreadState::trim_queue() { } while (!_refs->is_empty()); } -oop G1ParScanThreadState::copy_to_survivor_space(oop const old) { - size_t word_sz = old->size(); - HeapRegion* from_region = _g1h->heap_region_containing_raw(old); +HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state, + InCSetState* dest, + size_t word_sz, + AllocationContext_t const context) { + assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value())); + assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value())); + + // Right now we only have two types of regions (young / old) so + // let's keep the logic here simple. We can generalize it when necessary. + if (dest->is_young()) { + HeapWord* const obj_ptr = _g1_par_allocator->allocate(InCSetState::Old, + word_sz, context); + if (obj_ptr == NULL) { + return NULL; + } + // Make sure that we won't attempt to copy any other objects out + // of a survivor region (given that apparently we cannot allocate + // any new ones) to avoid coming into this slow path. + _tenuring_threshold = 0; + dest->set_old(); + return obj_ptr; + } else { + assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value())); + // no other space to try. + return NULL; + } +} + +InCSetState G1ParScanThreadState::next_state(InCSetState const state, markOop const m, uint& age) { + if (state.is_young()) { + age = !m->has_displaced_mark_helper() ? m->age() + : m->displaced_mark_helper()->age(); + if (age < _tenuring_threshold) { + return state; + } + } + return dest(state); +} + +oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, + oop const old, + markOop const old_mark) { + const size_t word_sz = old->size(); + HeapRegion* const from_region = _g1h->heap_region_containing_raw(old); // +1 to make the -1 indexes valid... - int young_index = from_region->young_index_in_cset()+1; + const int young_index = from_region->young_index_in_cset()+1; assert( (from_region->is_young() && young_index > 0) || (!from_region->is_young() && young_index == 0), "invariant" ); - G1CollectorPolicy* g1p = _g1h->g1_policy(); - markOop m = old->mark(); - int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age() - : m->age(); - GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age, - word_sz); - AllocationContext_t context = from_region->allocation_context(); - HeapWord* obj_ptr = _g1_par_allocator->allocate(alloc_purpose, word_sz, context); -#ifndef PRODUCT - // Should this evacuation fail? - if (_g1h->evacuation_should_fail()) { - if (obj_ptr != NULL) { - _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context); - obj_ptr = NULL; + const AllocationContext_t context = from_region->allocation_context(); + + uint age = 0; + InCSetState dest_state = next_state(state, old_mark, age); + HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context); + + // PLAB allocations should succeed most of the time, so we'll + // normally check against NULL once and that's it. + if (obj_ptr == NULL) { + obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context); + if (obj_ptr == NULL) { + obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context); + if (obj_ptr == NULL) { + // This will either forward-to-self, or detect that someone else has + // installed a forwarding pointer. + return _g1h->handle_evacuation_failure_par(this, old); + } } } -#endif // !PRODUCT - if (obj_ptr == NULL) { - // This will either forward-to-self, or detect that someone else has - // installed a forwarding pointer. + assert(obj_ptr != NULL, "when we get here, allocation should have succeeded"); +#ifndef PRODUCT + // Should this evacuation fail? + if (_g1h->evacuation_should_fail()) { + // Doing this after all the allocation attempts also tests the + // undo_allocation() method too. + _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context); return _g1h->handle_evacuation_failure_par(this, old); } - - oop obj = oop(obj_ptr); +#endif // !PRODUCT // We're going to allocate linearly, so might as well prefetch ahead. Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes); - oop forward_ptr = old->forward_to_atomic(obj); + const oop obj = oop(obj_ptr); + const oop forward_ptr = old->forward_to_atomic(obj); if (forward_ptr == NULL) { Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz); - // alloc_purpose is just a hint to allocate() above, recheck the type of region - // we actually allocated from and update alloc_purpose accordingly - HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr); - alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured; - - if (g1p->track_object_age(alloc_purpose)) { - // We could simply do obj->incr_age(). However, this causes a - // performance issue. obj->incr_age() will first check whether - // the object has a displaced mark by checking its mark word; - // getting the mark word from the new location of the object - // stalls. So, given that we already have the mark word and we - // are about to install it anyway, it's better to increase the - // age on the mark word, when the object does not have a - // displaced mark word. We're not expecting many objects to have - // a displaced marked word, so that case is not optimized - // further (it could be...) and we simply call obj->incr_age(). - - if (m->has_displaced_mark_helper()) { - // in this case, we have to install the mark word first, + if (dest_state.is_young()) { + if (age < markOopDesc::max_age) { + age++; + } + if (old_mark->has_displaced_mark_helper()) { + // In this case, we have to install the mark word first, // otherwise obj looks to be forwarded (the old mark word, // which contains the forward pointer, was copied) - obj->set_mark(m); - obj->incr_age(); + obj->set_mark(old_mark); + markOop new_mark = old_mark->displaced_mark_helper()->set_age(age); + old_mark->set_displaced_mark_helper(new_mark); } else { - m = m->incr_age(); - obj->set_mark(m); + obj->set_mark(old_mark->set_age(age)); } - age_table()->add(obj, word_sz); + age_table()->add(age, word_sz); } else { - obj->set_mark(m); + obj->set_mark(old_mark); } if (G1StringDedup::is_enabled()) { - G1StringDedup::enqueue_from_evacuation(from_region->is_young(), - to_region->is_young(), + const bool is_from_young = state.is_young(); + const bool is_to_young = dest_state.is_young(); + assert(is_from_young == _g1h->heap_region_containing_raw(old)->is_young(), + "sanity"); + assert(is_to_young == _g1h->heap_region_containing_raw(obj)->is_young(), + "sanity"); + G1StringDedup::enqueue_from_evacuation(is_from_young, + is_to_young, queue_num(), obj); } - size_t* surv_young_words = surviving_young_words(); + size_t* const surv_young_words = surviving_young_words(); surv_young_words[young_index] += word_sz; if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) { @@ -240,14 +287,13 @@ oop G1ParScanThreadState::copy_to_survivor_space(oop const old) { oop* old_p = set_partial_array_mask(old); push_on_queue(old_p); } else { - // No point in using the slower heap_region_containing() method, - // given that we know obj is in the heap. - _scanner.set_region(_g1h->heap_region_containing_raw(obj)); + HeapRegion* const to_region = _g1h->heap_region_containing_raw(obj_ptr); + _scanner.set_region(to_region); obj->oop_iterate_backwards(&_scanner); } + return obj; } else { - _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context); - obj = forward_ptr; + _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context); + return forward_ptr; } - return obj; } diff --git a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp index c1262a688..d5350310e 100644 --- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp +++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp @@ -46,14 +46,16 @@ class G1ParScanThreadState : public StackObj { G1SATBCardTableModRefBS* _ct_bs; G1RemSet* _g1_rem; - G1ParGCAllocator* _g1_par_allocator; + G1ParGCAllocator* _g1_par_allocator; - ageTable _age_table; + ageTable _age_table; + InCSetState _dest[InCSetState::Num]; + // Local tenuring threshold. + uint _tenuring_threshold; + G1ParScanClosure _scanner; - G1ParScanClosure _scanner; - - size_t _alloc_buffer_waste; - size_t _undo_waste; + size_t _alloc_buffer_waste; + size_t _undo_waste; OopsInHeapRegionClosure* _evac_failure_cl; @@ -82,6 +84,14 @@ class G1ParScanThreadState : public StackObj { DirtyCardQueue& dirty_card_queue() { return _dcq; } G1SATBCardTableModRefBS* ctbs() { return _ct_bs; } + InCSetState dest(InCSetState original) const { + assert(original.is_valid(), + err_msg("Original state invalid: " CSETSTATE_FORMAT, original.value())); + assert(_dest[original.value()].is_valid_gen(), + err_msg("Dest state is invalid: " CSETSTATE_FORMAT, _dest[original.value()].value())); + return _dest[original.value()]; + } + public: G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp); ~G1ParScanThreadState(); @@ -112,7 +122,6 @@ class G1ParScanThreadState : public StackObj { } } } - public: void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) { _evac_failure_cl = evac_failure_cl; @@ -193,9 +202,20 @@ class G1ParScanThreadState : public StackObj { template <class T> inline void deal_with_reference(T* ref_to_scan); inline void dispatch_reference(StarTask ref); + + // Tries to allocate word_sz in the PLAB of the next "generation" after trying to + // allocate into dest. State is the original (source) cset state for the object + // that is allocated for. + // Returns a non-NULL pointer if successful, and updates dest if required. + HeapWord* allocate_in_next_plab(InCSetState const state, + InCSetState* dest, + size_t word_sz, + AllocationContext_t const context); + + inline InCSetState next_state(InCSetState const state, markOop const m, uint& age); public: - oop copy_to_survivor_space(oop const obj); + oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark); void trim_queue(); diff --git a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp index 3fb1829f2..1b03f8caa 100644 --- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp +++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp @@ -38,20 +38,21 @@ template <class T> void G1ParScanThreadState::do_oop_evac(T* p, HeapRegion* from // set, due to (benign) races in the claim mechanism during RSet scanning more // than one thread might claim the same card. So the same card may be // processed multiple times. So redo this check. - G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj); - if (in_cset_state == G1CollectedHeap::InCSet) { + const InCSetState in_cset_state = _g1h->in_cset_state(obj); + if (in_cset_state.is_in_cset()) { oop forwardee; - if (obj->is_forwarded()) { - forwardee = obj->forwardee(); + markOop m = obj->mark(); + if (m->is_marked()) { + forwardee = (oop) m->decode_pointer(); } else { - forwardee = copy_to_survivor_space(obj); + forwardee = copy_to_survivor_space(in_cset_state, obj, m); } oopDesc::encode_store_heap_oop(p, forwardee); - } else if (in_cset_state == G1CollectedHeap::IsHumongous) { + } else if (in_cset_state.is_humongous()) { _g1h->set_humongous_is_live(obj); } else { - assert(in_cset_state == G1CollectedHeap::InNeither, - err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state)); + assert(!in_cset_state.is_in_cset_or_humongous(), + err_msg("In_cset_state must be NotInCSet here, but is " CSETSTATE_FORMAT, in_cset_state.value())); } assert(obj != NULL, "Must be"); diff --git a/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp b/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp index 68d967c76..0c26b783e 100644 --- a/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp +++ b/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,17 +31,16 @@ #include "utilities/bitMap.inline.hpp" G1RegionToSpaceMapper::G1RegionToSpaceMapper(ReservedSpace rs, - size_t commit_granularity, + size_t used_size, + size_t page_size, size_t region_granularity, MemoryType type) : - _storage(), - _commit_granularity(commit_granularity), + _storage(rs, used_size, page_size), _region_granularity(region_granularity), _listener(NULL), _commit_map() { - guarantee(is_power_of_2(commit_granularity), "must be"); + guarantee(is_power_of_2(page_size), "must be"); guarantee(is_power_of_2(region_granularity), "must be"); - _storage.initialize_with_granularity(rs, commit_granularity); MemTracker::record_virtual_memory_type((address)rs.base(), type); } @@ -55,25 +54,26 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper { public: G1RegionsLargerThanCommitSizeMapper(ReservedSpace rs, - size_t os_commit_granularity, + size_t actual_size, + size_t page_size, size_t alloc_granularity, size_t commit_factor, MemoryType type) : - G1RegionToSpaceMapper(rs, os_commit_granularity, alloc_granularity, type), - _pages_per_region(alloc_granularity / (os_commit_granularity * commit_factor)) { + G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, type), + _pages_per_region(alloc_granularity / (page_size * commit_factor)) { - guarantee(alloc_granularity >= os_commit_granularity, "allocation granularity smaller than commit granularity"); + guarantee(alloc_granularity >= page_size, "allocation granularity smaller than commit granularity"); _commit_map.resize(rs.size() * commit_factor / alloc_granularity, /* in_resource_area */ false); } - virtual void commit_regions(uintptr_t start_idx, size_t num_regions) { - bool zero_filled = _storage.commit(start_idx * _pages_per_region, num_regions * _pages_per_region); + virtual void commit_regions(uint start_idx, size_t num_regions) { + bool zero_filled = _storage.commit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region); _commit_map.set_range(start_idx, start_idx + num_regions); fire_on_commit(start_idx, num_regions, zero_filled); } - virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions) { - _storage.uncommit(start_idx * _pages_per_region, num_regions * _pages_per_region); + virtual void uncommit_regions(uint start_idx, size_t num_regions) { + _storage.uncommit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region); _commit_map.clear_range(start_idx, start_idx + num_regions); } }; @@ -98,22 +98,23 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper { public: G1RegionsSmallerThanCommitSizeMapper(ReservedSpace rs, - size_t os_commit_granularity, + size_t actual_size, + size_t page_size, size_t alloc_granularity, size_t commit_factor, MemoryType type) : - G1RegionToSpaceMapper(rs, os_commit_granularity, alloc_granularity, type), - _regions_per_page((os_commit_granularity * commit_factor) / alloc_granularity), _refcounts() { + G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, type), + _regions_per_page((page_size * commit_factor) / alloc_granularity), _refcounts() { - guarantee((os_commit_granularity * commit_factor) >= alloc_granularity, "allocation granularity smaller than commit granularity"); - _refcounts.initialize((HeapWord*)rs.base(), (HeapWord*)(rs.base() + rs.size()), os_commit_granularity); + guarantee((page_size * commit_factor) >= alloc_granularity, "allocation granularity smaller than commit granularity"); + _refcounts.initialize((HeapWord*)rs.base(), (HeapWord*)(rs.base() + align_size_up(rs.size(), page_size)), page_size); _commit_map.resize(rs.size() * commit_factor / alloc_granularity, /* in_resource_area */ false); } - virtual void commit_regions(uintptr_t start_idx, size_t num_regions) { - for (uintptr_t i = start_idx; i < start_idx + num_regions; i++) { - assert(!_commit_map.at(i), err_msg("Trying to commit storage at region "INTPTR_FORMAT" that is already committed", i)); - uintptr_t idx = region_idx_to_page_idx(i); + virtual void commit_regions(uint start_idx, size_t num_regions) { + for (uint i = start_idx; i < start_idx + num_regions; i++) { + assert(!_commit_map.at(i), err_msg("Trying to commit storage at region %u that is already committed", i)); + size_t idx = region_idx_to_page_idx(i); uint old_refcount = _refcounts.get_by_index(idx); bool zero_filled = false; if (old_refcount == 0) { @@ -125,10 +126,10 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper { } } - virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions) { - for (uintptr_t i = start_idx; i < start_idx + num_regions; i++) { - assert(_commit_map.at(i), err_msg("Trying to uncommit storage at region "INTPTR_FORMAT" that is not committed", i)); - uintptr_t idx = region_idx_to_page_idx(i); + virtual void uncommit_regions(uint start_idx, size_t num_regions) { + for (uint i = start_idx; i < start_idx + num_regions; i++) { + assert(_commit_map.at(i), err_msg("Trying to uncommit storage at region %u that is not committed", i)); + size_t idx = region_idx_to_page_idx(i); uint old_refcount = _refcounts.get_by_index(idx); assert(old_refcount > 0, "must be"); if (old_refcount == 1) { @@ -147,14 +148,15 @@ void G1RegionToSpaceMapper::fire_on_commit(uint start_idx, size_t num_regions, b } G1RegionToSpaceMapper* G1RegionToSpaceMapper::create_mapper(ReservedSpace rs, - size_t os_commit_granularity, + size_t actual_size, + size_t page_size, size_t region_granularity, size_t commit_factor, MemoryType type) { - if (region_granularity >= (os_commit_granularity * commit_factor)) { - return new G1RegionsLargerThanCommitSizeMapper(rs, os_commit_granularity, region_granularity, commit_factor, type); + if (region_granularity >= (page_size * commit_factor)) { + return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type); } else { - return new G1RegionsSmallerThanCommitSizeMapper(rs, os_commit_granularity, region_granularity, commit_factor, type); + return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type); } } diff --git a/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp b/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp index 6b3420649..6623a37f9 100644 --- a/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp +++ b/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,17 +46,20 @@ class G1RegionToSpaceMapper : public CHeapObj<mtGC> { protected: // Backing storage. G1PageBasedVirtualSpace _storage; - size_t _commit_granularity; + size_t _region_granularity; // Mapping management BitMap _commit_map; - G1RegionToSpaceMapper(ReservedSpace rs, size_t commit_granularity, size_t region_granularity, MemoryType type); + G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, MemoryType type); void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled); public: MemRegion reserved() { return _storage.reserved(); } + size_t reserved_size() { return _storage.reserved_size(); } + size_t committed_size() { return _storage.committed_size(); } + void set_mapping_changed_listener(G1MappingChangedListener* listener) { _listener = listener; } virtual ~G1RegionToSpaceMapper() { @@ -67,16 +70,20 @@ class G1RegionToSpaceMapper : public CHeapObj<mtGC> { return _commit_map.at(idx); } - virtual void commit_regions(uintptr_t start_idx, size_t num_regions = 1) = 0; - virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions = 1) = 0; + virtual void commit_regions(uint start_idx, size_t num_regions = 1) = 0; + virtual void uncommit_regions(uint start_idx, size_t num_regions = 1) = 0; // Creates an appropriate G1RegionToSpaceMapper for the given parameters. + // The actual space to be used within the given reservation is given by actual_size. + // This is because some OSes need to round up the reservation size to guarantee + // alignment of page_size. // The byte_translation_factor defines how many bytes in a region correspond to // a single byte in the data structure this mapper is for. // Eg. in the card table, this value corresponds to the size a single card - // table entry corresponds to. + // table entry corresponds to in the heap. static G1RegionToSpaceMapper* create_mapper(ReservedSpace rs, - size_t os_commit_granularity, + size_t actual_size, + size_t page_size, size_t region_granularity, size_t byte_translation_factor, MemoryType type); diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp index b28fd2376..9d5bc57fb 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -78,9 +78,8 @@ G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) _cards_scanned(NULL), _total_cards_scanned(0), _prev_period_summary() { - _seq_task = new SubTasksDone(NumSeqTasks); guarantee(n_workers() > 0, "There should be some workers"); - _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC); + _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC); for (uint i = 0; i < n_workers(); i++) { _cset_rs_update_cl[i] = NULL; } @@ -90,11 +89,10 @@ G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) } G1RemSet::~G1RemSet() { - delete _seq_task; for (uint i = 0; i < n_workers(); i++) { assert(_cset_rs_update_cl[i] == NULL, "it should be"); } - FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl, mtGC); + FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl, mtGC); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -108,7 +106,7 @@ class ScanRSClosure : public HeapRegionClosure { size_t _cards_done, _cards; G1CollectedHeap* _g1h; - OopsInHeapRegionClosure* _oc; + G1ParPushHeapRSClosure* _oc; CodeBlobClosure* _code_root_cl; G1BlockOffsetSharedArray* _bot_shared; @@ -120,7 +118,7 @@ class ScanRSClosure : public HeapRegionClosure { bool _try_claimed; public: - ScanRSClosure(OopsInHeapRegionClosure* oc, + ScanRSClosure(G1ParPushHeapRSClosure* oc, CodeBlobClosure* code_root_cl, uint worker_i) : _oc(oc), @@ -142,16 +140,13 @@ public: void scanCard(size_t index, HeapRegion *r) { // Stack allocate the DirtyCardToOopClosure instance HeapRegionDCTOC cl(_g1h, r, _oc, - CardTableModRefBS::Precise, - HeapRegionDCTOC::IntoCSFilterKind); + CardTableModRefBS::Precise); // Set the "from" region in the closure. _oc->set_region(r); - HeapWord* card_start = _bot_shared->address_for_index(index); - HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words; - Space *sp = SharedHeap::heap()->space_containing(card_start); - MemRegion sm_region = sp->used_region_at_save_marks(); - MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end)); + MemRegion card_region(_bot_shared->address_for_index(index), G1BlockOffsetSharedArray::N_words); + MemRegion pre_gc_allocated(r->bottom(), r->scan_top()); + MemRegion mr = pre_gc_allocated.intersection(card_region); if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) { // We make the card as "claimed" lazily (so races are possible // but they're benign), which reduces the number of duplicate @@ -240,7 +235,7 @@ public: size_t cards_looked_up() { return _cards;} }; -void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, +void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc, CodeBlobClosure* code_root_cl, uint worker_i) { double rs_time_start = os::elapsedTime(); @@ -258,9 +253,8 @@ void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, assert(_cards_scanned != NULL, "invariant"); _cards_scanned[worker_i] = scanRScl.cards_done(); - _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - _g1p->phase_times()->record_strong_code_root_scan_time(worker_i, - scanRScl.strong_code_root_scan_time_sec() * 1000.0); + _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); + _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec()); } // Closure used for updating RSets and recording references that @@ -297,29 +291,18 @@ public: }; void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) { - double start = os::elapsedTime(); + G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i); // Apply the given closure to all remaining log entries. RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq); _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i); - - // Now there should be no dirty cards. - if (G1RSLogCheckCardTable) { - CountNonCleanMemRegionClosure cl(_g1); - _ct_bs->mod_card_iterate(&cl); - // XXX This isn't true any more: keeping cards of young regions - // marked dirty broke it. Need some reasonable fix. - guarantee(cl.n() == 0, "Card table should be clean."); - } - - _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); } void G1RemSet::cleanupHRRS() { HeapRegionRemSet::cleanup(); } -void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, +void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc, CodeBlobClosure* code_root_cl, uint worker_i) { #if CARD_REPEAT_HISTO @@ -344,23 +327,8 @@ void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, assert((ParallelGCThreads > 0) || worker_i == 0, "invariant"); - // The two flags below were introduced temporarily to serialize - // the updating and scanning of remembered sets. There are some - // race conditions when these two operations are done in parallel - // and they are causing failures. When we resolve said race - // conditions, we'll revert back to parallel remembered set - // updating and scanning. See CRs 6677707 and 6677708. - if (G1UseParallelRSetUpdating || (worker_i == 0)) { - updateRS(&into_cset_dcq, worker_i); - } else { - _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0); - _g1p->phase_times()->record_update_rs_time(worker_i, 0.0); - } - if (G1UseParallelRSetScanning || (worker_i == 0)) { - scanRS(oc, code_root_cl, worker_i); - } else { - _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0); - } + updateRS(&into_cset_dcq, worker_i); + scanRS(oc, code_root_cl, worker_i); // We now clear the cached values of _cset_rs_update_cl for this worker _cset_rs_update_cl[worker_i] = NULL; @@ -461,7 +429,7 @@ G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) : G1UpdateRSOrPushRefOopClosure:: G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h, G1RemSet* rs, - OopsInHeapRegionClosure* push_ref_cl, + G1ParPushHeapRSClosure* push_ref_cl, bool record_refs_into_cset, uint worker_i) : _g1(g1h), _g1_rem_set(rs), _from(NULL), @@ -562,7 +530,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, ct_freq_note_card(_ct_bs->index_for(start)); #endif - OopsInHeapRegionClosure* oops_in_heap_closure = NULL; + G1ParPushHeapRSClosure* oops_in_heap_closure = NULL; if (check_for_refs_into_cset) { // ConcurrentG1RefineThreads have worker numbers larger than what // _cset_rs_update_cl[] is set up to handle. But those threads should diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.hpp index 5a629fad2..9839e86c5 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -33,6 +33,7 @@ class G1CollectedHeap; class CardTableModRefBarrierSet; class ConcurrentG1Refine; +class G1ParPushHeapRSClosure; // A G1RemSet in which each heap region has a rem set that records the // external heap references into it. Uses a mod ref bs to track updates, @@ -58,7 +59,6 @@ protected: }; CardTableModRefBS* _ct_bs; - SubTasksDone* _seq_task; G1CollectorPolicy* _g1p; ConcurrentG1Refine* _cg1r; @@ -68,7 +68,7 @@ protected: // Used for caching the closure that is responsible for scanning // references into the collection set. - OopsInHeapRegionClosure** _cset_rs_update_cl; + G1ParPushHeapRSClosure** _cset_rs_update_cl; // Print the given summary info virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL); @@ -95,7 +95,7 @@ public: // partitioning the work to be done. It should be the same as // the "i" passed to the calling thread's work(i) function. // In the sequential case this param will be ignored. - void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk, CodeBlobClosure* code_root_cl, uint worker_i); @@ -107,7 +107,7 @@ public: void prepare_for_oops_into_collection_set_do(); void cleanup_after_oops_into_collection_set_do(); - void scanRS(OopsInHeapRegionClosure* oc, + void scanRS(G1ParPushHeapRSClosure* oc, CodeBlobClosure* code_root_cl, uint worker_i); diff --git a/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp b/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp new file mode 100644 index 000000000..b27696d2d --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "classfile/symbolTable.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/codeCache.hpp" +#include "gc_implementation/g1/bufferingOopClosure.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1GCPhaseTimes.hpp" +#include "gc_implementation/g1/g1RemSet.inline.hpp" +#include "gc_implementation/g1/g1RootProcessor.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/fprofiler.hpp" +#include "runtime/mutex.hpp" +#include "services/management.hpp" + +class G1CodeBlobClosure : public CodeBlobClosure { + class HeapRegionGatheringOopClosure : public OopClosure { + G1CollectedHeap* _g1h; + OopClosure* _work; + nmethod* _nm; + + template <typename T> + void do_oop_work(T* p) { + _work->do_oop(p); + T oop_or_narrowoop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(oop_or_narrowoop)) { + oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop); + HeapRegion* hr = _g1h->heap_region_containing_raw(o); + assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset"); + hr->add_strong_code_root(_nm); + } + } + + public: + HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {} + + void do_oop(oop* o) { + do_oop_work(o); + } + + void do_oop(narrowOop* o) { + do_oop_work(o); + } + + void set_nm(nmethod* nm) { + _nm = nm; + } + }; + + HeapRegionGatheringOopClosure _oc; +public: + G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {} + + void do_code_blob(CodeBlob* cb) { + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + if (!nm->test_set_oops_do_mark()) { + _oc.set_nm(nm); + nm->oops_do(&_oc); + nm->fix_oop_relocations(); + } + } + } +}; + + +void G1RootProcessor::worker_has_discovered_all_strong_classes() { + uint n_workers = _g1h->n_par_threads(); + assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading"); + + if (n_workers > 0) { + uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes); + if (new_value == n_workers) { + // This thread is last. Notify the others. + MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag); + _lock.notify_all(); + } + } +} + +void G1RootProcessor::wait_until_all_strong_classes_discovered() { + uint n_workers = _g1h->n_par_threads(); + assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading"); + + if (n_workers > 0 && (uint)_n_workers_discovered_strong_classes != n_workers) { + MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag); + while ((uint)_n_workers_discovered_strong_classes != n_workers) { + _lock.wait(Mutex::_no_safepoint_check_flag, 0, false); + } + } +} + +G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) : + _g1h(g1h), + _process_strong_tasks(G1RP_PS_NumElements), + _srs(g1h), + _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false), + _n_workers_discovered_strong_classes(0) {} + +void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots, + OopClosure* scan_non_heap_weak_roots, + CLDClosure* scan_strong_clds, + CLDClosure* scan_weak_clds, + bool trace_metadata, + uint worker_i) { + // First scan the shared roots. + double ext_roots_start = os::elapsedTime(); + G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times(); + + BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); + BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots); + + OopClosure* const weak_roots = &buf_scan_non_heap_weak_roots; + OopClosure* const strong_roots = &buf_scan_non_heap_roots; + + // CodeBlobClosures are not interoperable with BufferingOopClosures + G1CodeBlobClosure root_code_blobs(scan_non_heap_roots); + + process_java_roots(strong_roots, + trace_metadata ? scan_strong_clds : NULL, + scan_strong_clds, + trace_metadata ? NULL : scan_weak_clds, + &root_code_blobs, + phase_times, + worker_i); + + // This is the point where this worker thread will not find more strong CLDs/nmethods. + // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing. + if (trace_metadata) { + worker_has_discovered_all_strong_classes(); + } + + process_vm_roots(strong_roots, weak_roots, phase_times, worker_i); + + { + // Now the CM ref_processor roots. + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CMRefRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_refProcessor_oops_do)) { + // We need to treat the discovered reference lists of the + // concurrent mark ref processor as roots and keep entries + // (which are added by the marking threads) on them live + // until they can be processed at the end of marking. + _g1h->ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots); + } + } + + if (trace_metadata) { + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WaitForStrongCLD, worker_i); + // Barrier to make sure all workers passed + // the strong CLD and strong nmethods phases. + wait_until_all_strong_classes_discovered(); + } + + // Now take the complement of the strong CLDs. + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WeakCLDRoots, worker_i); + ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds); + } else { + phase_times->record_time_secs(G1GCPhaseTimes::WaitForStrongCLD, worker_i, 0.0); + phase_times->record_time_secs(G1GCPhaseTimes::WeakCLDRoots, worker_i, 0.0); + } + + // Finish up any enqueued closure apps (attributed as object copy time). + buf_scan_non_heap_roots.done(); + buf_scan_non_heap_weak_roots.done(); + + double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds() + + buf_scan_non_heap_weak_roots.closure_app_seconds(); + + phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec); + + double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec; + + phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec); + + // During conc marking we have to filter the per-thread SATB buffers + // to make sure we remove any oops into the CSet (which will show up + // as implicitly live). + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->mark_in_progress()) { + JavaThread::satb_mark_queue_set().filter_thread_buffers(); + } + } + + _process_strong_tasks.all_tasks_completed(); +} + +void G1RootProcessor::process_strong_roots(OopClosure* oops, + CLDClosure* clds, + CodeBlobClosure* blobs) { + + process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0); + process_vm_roots(oops, NULL, NULL, 0); + + _process_strong_tasks.all_tasks_completed(); +} + +void G1RootProcessor::process_all_roots(OopClosure* oops, + CLDClosure* clds, + CodeBlobClosure* blobs) { + + process_java_roots(oops, NULL, clds, clds, NULL, NULL, 0); + process_vm_roots(oops, oops, NULL, 0); + + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_CodeCache_oops_do)) { + CodeCache::blobs_do(blobs); + } + + _process_strong_tasks.all_tasks_completed(); +} + +void G1RootProcessor::process_java_roots(OopClosure* strong_roots, + CLDClosure* thread_stack_clds, + CLDClosure* strong_clds, + CLDClosure* weak_clds, + CodeBlobClosure* strong_code, + G1GCPhaseTimes* phase_times, + uint worker_i) { + assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set"); + // Iterating over the CLDG and the Threads are done early to allow us to + // first process the strong CLDs and nmethods and then, after a barrier, + // let the thread process the weak CLDs and nmethods. + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) { + ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i); + Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code); + } +} + +void G1RootProcessor::process_vm_roots(OopClosure* strong_roots, + OopClosure* weak_roots, + G1GCPhaseTimes* phase_times, + uint worker_i) { + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::UniverseRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_Universe_oops_do)) { + Universe::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JNIRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_JNIHandles_oops_do)) { + JNIHandles::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ObjectSynchronizerRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) { + ObjectSynchronizer::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::FlatProfilerRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) { + FlatProfiler::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ManagementRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_Management_oops_do)) { + Management::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JVMTIRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_jvmti_oops_do)) { + JvmtiExport::oops_do(strong_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SystemDictionaryRoots, worker_i); + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) { + SystemDictionary::roots_oops_do(strong_roots, weak_roots); + } + } + + { + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::StringTableRoots, worker_i); + // All threads execute the following. A specific chunk of buckets + // from the StringTable are the individual tasks. + if (weak_roots != NULL) { + StringTable::possibly_parallel_oops_do(weak_roots); + } + } +} + +void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, + OopClosure* scan_non_heap_weak_roots, + uint worker_i) { + G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times(); + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CodeCacheRoots, worker_i); + + // Now scan the complement of the collection set. + G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots); + + _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i); +} + +void G1RootProcessor::set_num_workers(int active_workers) { + _process_strong_tasks.set_n_threads(active_workers); +} diff --git a/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp b/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp new file mode 100644 index 000000000..1cce9f357 --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP + +#include "memory/allocation.hpp" +#include "memory/sharedHeap.hpp" +#include "runtime/mutex.hpp" + +class CLDClosure; +class CodeBlobClosure; +class G1CollectedHeap; +class G1GCPhaseTimes; +class G1ParPushHeapRSClosure; +class Monitor; +class OopClosure; +class SubTasksDone; + +// Scoped object to assist in applying oop, CLD and code blob closures to +// root locations. Handles claiming of different root scanning tasks +// and takes care of global state for root scanning via a StrongRootsScope. +// In the parallel case there is a shared G1RootProcessor object where all +// worker thread call the process_roots methods. +class G1RootProcessor : public StackObj { + G1CollectedHeap* _g1h; + SubTasksDone _process_strong_tasks; + SharedHeap::StrongRootsScope _srs; + + // Used to implement the Thread work barrier. + Monitor _lock; + volatile jint _n_workers_discovered_strong_classes; + + enum G1H_process_roots_tasks { + G1RP_PS_Universe_oops_do, + G1RP_PS_JNIHandles_oops_do, + G1RP_PS_ObjectSynchronizer_oops_do, + G1RP_PS_FlatProfiler_oops_do, + G1RP_PS_Management_oops_do, + G1RP_PS_SystemDictionary_oops_do, + G1RP_PS_ClassLoaderDataGraph_oops_do, + G1RP_PS_jvmti_oops_do, + G1RP_PS_CodeCache_oops_do, + G1RP_PS_filter_satb_buffers, + G1RP_PS_refProcessor_oops_do, + // Leave this one last. + G1RP_PS_NumElements + }; + + void worker_has_discovered_all_strong_classes(); + void wait_until_all_strong_classes_discovered(); + + void process_java_roots(OopClosure* scan_non_heap_roots, + CLDClosure* thread_stack_clds, + CLDClosure* scan_strong_clds, + CLDClosure* scan_weak_clds, + CodeBlobClosure* scan_strong_code, + G1GCPhaseTimes* phase_times, + uint worker_i); + + void process_vm_roots(OopClosure* scan_non_heap_roots, + OopClosure* scan_non_heap_weak_roots, + G1GCPhaseTimes* phase_times, + uint worker_i); + +public: + G1RootProcessor(G1CollectedHeap* g1h); + + // Apply closures to the strongly and weakly reachable roots in the system + // in a single pass. + // Record and report timing measurements for sub phases using the worker_i + void evacuate_roots(OopClosure* scan_non_heap_roots, + OopClosure* scan_non_heap_weak_roots, + CLDClosure* scan_strong_clds, + CLDClosure* scan_weak_clds, + bool trace_metadata, + uint worker_i); + + // Apply oops, clds and blobs to all strongly reachable roots in the system + void process_strong_roots(OopClosure* oops, + CLDClosure* clds, + CodeBlobClosure* blobs); + + // Apply oops, clds and blobs to strongly and weakly reachable roots in the system + void process_all_roots(OopClosure* oops, + CLDClosure* clds, + CodeBlobClosure* blobs); + + // Apply scan_rs to all locations in the union of the remembered sets for all + // regions in the collection set + // (having done "set_region" to indicate the region in which the root resides), + void scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, + OopClosure* scan_non_heap_weak_roots, + uint worker_i); + + // Inform the root processor about the number of worker threads + void set_num_workers(int active_workers); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp index d353d7ebd..bb960ee3a 100644 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp @@ -105,7 +105,7 @@ void G1StringDedup::deduplicate(oop java_string) { void G1StringDedup::oops_do(OopClosure* keep_alive) { assert(is_enabled(), "String deduplication not enabled"); - unlink_or_oops_do(NULL, keep_alive); + unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */); } void G1StringDedup::unlink(BoolObjectClosure* is_alive) { @@ -122,37 +122,35 @@ void G1StringDedup::unlink(BoolObjectClosure* is_alive) { class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask { private: G1StringDedupUnlinkOrOopsDoClosure _cl; + G1GCPhaseTimes* _phase_times; public: G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive, OopClosure* keep_alive, - bool allow_resize_and_rehash) : + bool allow_resize_and_rehash, + G1GCPhaseTimes* phase_times) : AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"), - _cl(is_alive, keep_alive, allow_resize_and_rehash) { - } + _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { } virtual void work(uint worker_id) { - double queue_fixup_start = os::elapsedTime(); - G1StringDedupQueue::unlink_or_oops_do(&_cl); - - double table_fixup_start = os::elapsedTime(); - G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id); - - double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0; - double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0; - G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); - g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms); - g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms); + { + G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id); + G1StringDedupQueue::unlink_or_oops_do(&_cl); + } + { + G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id); + G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id); + } } }; -void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) { +void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, + OopClosure* keep_alive, + bool allow_resize_and_rehash, + G1GCPhaseTimes* phase_times) { assert(is_enabled(), "String deduplication not enabled"); - G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); - g1p->phase_times()->note_string_dedup_fixup_start(); - double fixup_start = os::elapsedTime(); - G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash); + G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times); if (G1CollectedHeap::use_parallel_gc_threads()) { G1CollectedHeap* g1h = G1CollectedHeap::heap(); g1h->set_par_threads(); @@ -161,10 +159,6 @@ void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* k } else { task.work(0); } - - double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0; - g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms); - g1p->phase_times()->note_string_dedup_fixup_end(); } void G1StringDedup::threads_do(ThreadClosure* tc) { diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp index 68f700f65..3792a667a 100644 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp @@ -90,6 +90,7 @@ class BoolObjectClosure; class ThreadClosure; class outputStream; class G1StringDedupTable; +class G1GCPhaseTimes; // // Main interface for interacting with string deduplication. @@ -130,7 +131,7 @@ public: static void oops_do(OopClosure* keep_alive); static void unlink(BoolObjectClosure* is_alive); static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, - bool allow_resize_and_rehash = true); + bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL); static void threads_do(ThreadClosure* tc); static void print_worker_threads_on(outputStream* st); diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index 4478e93f8..e24cc9594 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -217,14 +217,6 @@ product(uintx, G1HeapRegionSize, 0, \ "Size of the G1 regions.") \ \ - experimental(bool, G1UseParallelRSetUpdating, true, \ - "Enables the parallelization of remembered set updating " \ - "during evacuation pauses") \ - \ - experimental(bool, G1UseParallelRSetScanning, true, \ - "Enables the parallelization of remembered set scanning " \ - "during evacuation pauses") \ - \ product(uintx, G1ConcRefinementThreads, 0, \ "If non-0 is the number of parallel rem set update threads, " \ "otherwise the value is determined ergonomically.") \ @@ -282,10 +274,14 @@ product(uintx, G1MixedGCCountTarget, 8, \ "The target number of mixed GCs after a marking cycle.") \ \ - experimental(bool, G1ReclaimDeadHumongousObjectsAtYoungGC, true, \ + experimental(bool, G1EagerReclaimHumongousObjects, true, \ "Try to reclaim dead large objects at every young GC.") \ \ - experimental(bool, G1TraceReclaimDeadHumongousObjectsAtYoungGC, false, \ + experimental(bool, G1EagerReclaimHumongousObjectsWithStaleRefs, true, \ + "Try to reclaim dead large objects that have a few stale " \ + "references at every young GC.") \ + \ + experimental(bool, G1TraceEagerReclaimHumongousObjects, false, \ "Print some information about large object liveness " \ "at every young GC.") \ \ diff --git a/src/share/vm/gc_implementation/g1/heapRegion.cpp b/src/share/vm/gc_implementation/g1/heapRegion.cpp index 3a7251b56..d5a4b2617 100644 --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,93 +47,55 @@ size_t HeapRegion::GrainWords = 0; size_t HeapRegion::CardsPerRegion = 0; HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1, - HeapRegion* hr, ExtendedOopClosure* cl, - CardTableModRefBS::PrecisionStyle precision, - FilterKind fk) : + HeapRegion* hr, + G1ParPushHeapRSClosure* cl, + CardTableModRefBS::PrecisionStyle precision) : DirtyCardToOopClosure(hr, cl, precision, NULL), - _hr(hr), _fk(fk), _g1(g1) { } + _hr(hr), _rs_scan(cl), _g1(g1) { } FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc) : _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { } -template<class ClosureType> -HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h, - HeapRegion* hr, - HeapWord* cur, HeapWord* top) { - oop cur_oop = oop(cur); - size_t oop_size = hr->block_size(cur); - HeapWord* next_obj = cur + oop_size; - while (next_obj < top) { - // Keep filtering the remembered set. - if (!g1h->is_obj_dead(cur_oop, hr)) { - // Bottom lies entirely below top, so we can call the - // non-memRegion version of oop_iterate below. - cur_oop->oop_iterate(cl); - } - cur = next_obj; - cur_oop = oop(cur); - oop_size = hr->block_size(cur); - next_obj = cur + oop_size; - } - return cur; -} - void HeapRegionDCTOC::walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) { G1CollectedHeap* g1h = _g1; size_t oop_size; - ExtendedOopClosure* cl2 = NULL; - - FilterIntoCSClosure intoCSFilt(this, g1h, _cl); - FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl); - - switch (_fk) { - case NoFilterKind: cl2 = _cl; break; - case IntoCSFilterKind: cl2 = &intoCSFilt; break; - case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break; - default: ShouldNotReachHere(); - } + HeapWord* cur = bottom; // Start filtering what we add to the remembered set. If the object is // not considered dead, either because it is marked (in the mark bitmap) // or it was allocated after marking finished, then we add it. Otherwise // we can safely ignore the object. - if (!g1h->is_obj_dead(oop(bottom), _hr)) { - oop_size = oop(bottom)->oop_iterate(cl2, mr); + if (!g1h->is_obj_dead(oop(cur), _hr)) { + oop_size = oop(cur)->oop_iterate(_rs_scan, mr); } else { - oop_size = _hr->block_size(bottom); + oop_size = _hr->block_size(cur); } - bottom += oop_size; - - if (bottom < top) { - // We replicate the loop below for several kinds of possible filters. - switch (_fk) { - case NoFilterKind: - bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top); - break; - - case IntoCSFilterKind: { - FilterIntoCSClosure filt(this, g1h, _cl); - bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); - break; - } - - case OutOfRegionFilterKind: { - FilterOutOfRegionClosure filt(_hr, _cl); - bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); - break; - } - - default: - ShouldNotReachHere(); + cur += oop_size; + + if (cur < top) { + oop cur_oop = oop(cur); + oop_size = _hr->block_size(cur); + HeapWord* next_obj = cur + oop_size; + while (next_obj < top) { + // Keep filtering the remembered set. + if (!g1h->is_obj_dead(cur_oop, _hr)) { + // Bottom lies entirely below top, so we can call the + // non-memRegion version of oop_iterate below. + cur_oop->oop_iterate(_rs_scan); + } + cur = next_obj; + cur_oop = oop(cur); + oop_size = _hr->block_size(cur); + next_obj = cur + oop_size; } // Last object. Need to do dead-obj filtering here too. - if (!g1h->is_obj_dead(oop(bottom), _hr)) { - oop(bottom)->oop_iterate(cl2, mr); + if (!g1h->is_obj_dead(oop(cur), _hr)) { + oop(cur)->oop_iterate(_rs_scan, mr); } } } @@ -338,7 +300,7 @@ void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) { _orig_end = mr.end(); hr_clear(false /*par*/, false /*clear_space*/); set_top(bottom()); - record_top_and_timestamp(); + record_timestamp(); } CompactibleSpace* HeapRegion::next_compaction_space() const { @@ -426,9 +388,9 @@ oops_on_card_seq_iterate_careful(MemRegion mr, // If we're within a stop-world GC, then we might look at a card in a // GC alloc region that extends onto a GC LAB, which may not be - // parseable. Stop such at the "saved_mark" of the region. + // parseable. Stop such at the "scan_top" of the region. if (g1h->is_gc_active()) { - mr = mr.intersection(used_region_at_save_marks()); + mr = mr.intersection(MemRegion(bottom(), scan_top())); } else { mr = mr.intersection(used_region()); } @@ -468,7 +430,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr, oop obj; HeapWord* next = cur; - while (next <= start) { + do { cur = next; obj = oop(cur); if (obj->klass_or_null() == NULL) { @@ -477,45 +439,38 @@ oops_on_card_seq_iterate_careful(MemRegion mr, } // Otherwise... next = cur + block_size(cur); - } + } while (next <= start); // If we finish the above loop...We have a parseable object that // begins on or before the start of the memory region, and ends // inside or spans the entire region. - - assert(obj == oop(cur), "sanity"); assert(cur <= start, "Loop postcondition"); assert(obj->klass_or_null() != NULL, "Loop postcondition"); - assert((cur + block_size(cur)) > start, "Loop postcondition"); - - if (!g1h->is_obj_dead(obj)) { - obj->oop_iterate(cl, mr); - } - while (cur < end) { + do { obj = oop(cur); + assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant"); if (obj->klass_or_null() == NULL) { // Ran into an unparseable point. return cur; - }; + } - // Otherwise: - next = cur + block_size(cur); + // Advance the current pointer. "obj" still points to the object to iterate. + cur = cur + block_size(cur); if (!g1h->is_obj_dead(obj)) { - if (next < end || !obj->is_objArray()) { - // This object either does not span the MemRegion - // boundary, or if it does it's not an array. - // Apply closure to whole object. + // Non-objArrays are sometimes marked imprecise at the object start. We + // always need to iterate over them in full. + // We only iterate over object arrays in full if they are completely contained + // in the memory region. + if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) { obj->oop_iterate(cl); } else { - // This obj is an array that spans the boundary. - // Stop at the boundary. obj->oop_iterate(cl, mr); } } - cur = next; - } + } while (cur < end); + return NULL; } @@ -980,7 +935,7 @@ void HeapRegion::verify() const { void G1OffsetTableContigSpace::clear(bool mangle_space) { set_top(bottom()); - set_saved_mark_word(bottom()); + _scan_top = bottom(); CompactibleSpace::clear(mangle_space); reset_bot(); } @@ -1012,41 +967,42 @@ HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start, return _offsets.threshold(); } -HeapWord* G1OffsetTableContigSpace::saved_mark_word() const { +HeapWord* G1OffsetTableContigSpace::scan_top() const { G1CollectedHeap* g1h = G1CollectedHeap::heap(); - assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" ); HeapWord* local_top = top(); OrderAccess::loadload(); - if (_gc_time_stamp < g1h->get_gc_time_stamp()) { + const unsigned local_time_stamp = _gc_time_stamp; + assert(local_time_stamp <= g1h->get_gc_time_stamp(), "invariant"); + if (local_time_stamp < g1h->get_gc_time_stamp()) { return local_top; } else { - return Space::saved_mark_word(); + return _scan_top; } } -void G1OffsetTableContigSpace::record_top_and_timestamp() { +void G1OffsetTableContigSpace::record_timestamp() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp(); if (_gc_time_stamp < curr_gc_time_stamp) { - // The order of these is important, as another thread might be - // about to start scanning this region. If it does so after - // set_saved_mark and before _gc_time_stamp = ..., then the latter - // will be false, and it will pick up top() as the high water mark - // of region. If it does so after _gc_time_stamp = ..., then it - // will pick up the right saved_mark_word() as the high water mark - // of the region. Either way, the behaviour will be correct. - Space::set_saved_mark_word(top()); - OrderAccess::storestore(); + // Setting the time stamp here tells concurrent readers to look at + // scan_top to know the maximum allowed address to look at. + + // scan_top should be bottom for all regions except for the + // retained old alloc region which should have scan_top == top + HeapWord* st = _scan_top; + guarantee(st == _bottom || st == _top, "invariant"); + _gc_time_stamp = curr_gc_time_stamp; - // No need to do another barrier to flush the writes above. If - // this is called in parallel with other threads trying to - // allocate into the region, the caller should call this while - // holding a lock and when the lock is released the writes will be - // flushed. } } +void G1OffsetTableContigSpace::record_retained_region() { + // scan_top is the maximum address where it's safe for the next gc to + // scan this region. + _scan_top = top(); +} + void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) { object_iterate(blk); } @@ -1080,6 +1036,8 @@ G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray, void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) { CompactibleSpace::initialize(mr, clear_space, mangle_space); _top = bottom(); + _scan_top = bottom(); + set_saved_mark_word(NULL); reset_bot(); } diff --git a/src/share/vm/gc_implementation/g1/heapRegion.hpp b/src/share/vm/gc_implementation/g1/heapRegion.hpp index 16c0d719e..80ba22313 100644 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -67,17 +67,9 @@ class nmethod; // sets. class HeapRegionDCTOC : public DirtyCardToOopClosure { -public: - // Specification of possible DirtyCardToOopClosure filtering. - enum FilterKind { - NoFilterKind, - IntoCSFilterKind, - OutOfRegionFilterKind - }; - -protected: +private: HeapRegion* _hr; - FilterKind _fk; + G1ParPushHeapRSClosure* _rs_scan; G1CollectedHeap* _g1; // Walk the given memory region from bottom to (actual) top @@ -90,9 +82,9 @@ protected: public: HeapRegionDCTOC(G1CollectedHeap* g1, - HeapRegion* hr, ExtendedOopClosure* cl, - CardTableModRefBS::PrecisionStyle precision, - FilterKind fk); + HeapRegion* hr, + G1ParPushHeapRSClosure* cl, + CardTableModRefBS::PrecisionStyle precision); }; // The complicating factor is that BlockOffsetTable diverged @@ -101,28 +93,25 @@ public: // OffsetTableContigSpace. If the two versions of BlockOffsetTable could // be reconciled, then G1OffsetTableContigSpace could go away. -// The idea behind time stamps is the following. Doing a save_marks on -// all regions at every GC pause is time consuming (if I remember -// well, 10ms or so). So, we would like to do that only for regions -// that are GC alloc regions. To achieve this, we use time -// stamps. For every evacuation pause, G1CollectedHeap generates a -// unique time stamp (essentially a counter that gets -// incremented). Every time we want to call save_marks on a region, -// we set the saved_mark_word to top and also copy the current GC -// time stamp to the time stamp field of the space. Reading the -// saved_mark_word involves checking the time stamp of the -// region. If it is the same as the current GC time stamp, then we -// can safely read the saved_mark_word field, as it is valid. If the -// time stamp of the region is not the same as the current GC time -// stamp, then we instead read top, as the saved_mark_word field is -// invalid. Time stamps (on the regions and also on the -// G1CollectedHeap) are reset at every cleanup (we iterate over -// the regions anyway) and at the end of a Full GC. The current scheme -// that uses sequential unsigned ints will fail only if we have 4b +// The idea behind time stamps is the following. We want to keep track of +// the highest address where it's safe to scan objects for each region. +// This is only relevant for current GC alloc regions so we keep a time stamp +// per region to determine if the region has been allocated during the current +// GC or not. If the time stamp is current we report a scan_top value which +// was saved at the end of the previous GC for retained alloc regions and which is +// equal to the bottom for all other regions. +// There is a race between card scanners and allocating gc workers where we must ensure +// that card scanners do not read the memory allocated by the gc workers. +// In order to enforce that, we must not return a value of _top which is more recent than the +// time stamp. This is due to the fact that a region may become a gc alloc region at +// some point after we've read the timestamp value as being < the current time stamp. +// The time stamps are re-initialized to zero at cleanup and at Full GCs. +// The current scheme that uses sequential unsigned ints will fail only if we have 4b // evacuation pauses between two cleanups, which is _highly_ unlikely. class G1OffsetTableContigSpace: public CompactibleSpace { friend class VMStructs; HeapWord* _top; + HeapWord* volatile _scan_top; protected: G1BlockOffsetArrayContigSpace _offsets; Mutex _par_alloc_lock; @@ -166,10 +155,11 @@ class G1OffsetTableContigSpace: public CompactibleSpace { void set_bottom(HeapWord* value); void set_end(HeapWord* value); - virtual HeapWord* saved_mark_word() const; - void record_top_and_timestamp(); + HeapWord* scan_top() const; + void record_timestamp(); void reset_gc_time_stamp() { _gc_time_stamp = 0; } unsigned get_gc_time_stamp() { return _gc_time_stamp; } + void record_retained_region(); // See the comment above in the declaration of _pre_dummy_top for an // explanation of what it is. @@ -193,6 +183,8 @@ class G1OffsetTableContigSpace: public CompactibleSpace { virtual HeapWord* allocate(size_t word_size); HeapWord* par_allocate(size_t word_size); + HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; } + // MarkSweep support phase3 virtual HeapWord* initialize_threshold(); virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end); diff --git a/src/share/vm/gc_implementation/g1/heapRegionManager.cpp b/src/share/vm/gc_implementation/g1/heapRegionManager.cpp index f1ffc245d..020eb08df 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionManager.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionManager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -145,6 +145,24 @@ void HeapRegionManager::make_regions_available(uint start, uint num_regions) { } } +MemoryUsage HeapRegionManager::get_auxiliary_data_memory_usage() const { + size_t used_sz = + _prev_bitmap_mapper->committed_size() + + _next_bitmap_mapper->committed_size() + + _bot_mapper->committed_size() + + _cardtable_mapper->committed_size() + + _card_counts_mapper->committed_size(); + + size_t committed_sz = + _prev_bitmap_mapper->reserved_size() + + _next_bitmap_mapper->reserved_size() + + _bot_mapper->reserved_size() + + _cardtable_mapper->reserved_size() + + _card_counts_mapper->reserved_size(); + + return MemoryUsage(0, used_sz, committed_sz, committed_sz); +} + uint HeapRegionManager::expand_by(uint num_regions) { return expand_at(0, num_regions); } diff --git a/src/share/vm/gc_implementation/g1/heapRegionManager.hpp b/src/share/vm/gc_implementation/g1/heapRegionManager.hpp index 666184e50..83996f71d 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionManager.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegionManager.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ #include "gc_implementation/g1/g1BiasedArray.hpp" #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" #include "gc_implementation/g1/heapRegionSet.hpp" +#include "services/memoryUsage.hpp" class HeapRegion; class HeapRegionClosure; @@ -197,6 +198,8 @@ public: // Return the maximum number of regions in the heap. uint max_length() const { return (uint)_regions.length(); } + MemoryUsage get_auxiliary_data_memory_usage() const; + MemRegion reserved() const { return MemRegion(heap_bottom(), heap_end()); } // Expand the sequence to reflect that the heap has grown. Either create new diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp index ec0249ea4..2574c91b0 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -694,6 +694,18 @@ void OtherRegionsTable::scrub(CardTableModRefBS* ctbs, clear_fcc(); } +bool OtherRegionsTable::occupancy_less_or_equal_than(size_t limit) const { + if (limit <= (size_t)G1RSetSparseRegionEntries) { + return occ_coarse() == 0 && _first_all_fine_prts == NULL && occ_sparse() <= limit; + } else { + // Current uses of this method may only use values less than G1RSetSparseRegionEntries + // for the limit. The solution, comparing against occupied() would be too slow + // at this time. + Unimplemented(); + return false; + } +} + bool OtherRegionsTable::is_empty() const { return occ_sparse() == 0 && occ_coarse() == 0 && _first_all_fine_prts == NULL; } diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index 2d5c71b22..221651cf0 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -181,6 +181,10 @@ public: // sense. void add_reference(OopOrNarrowOopStar from, int tid); + // Returns whether this remembered set (and all sub-sets) have an occupancy + // that is less or equal than the given occupancy. + bool occupancy_less_or_equal_than(size_t limit) const; + // Removes any entries shown by the given bitmaps to contain only dead // objects. void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); @@ -276,6 +280,10 @@ public: return (strong_code_roots_list_length() == 0) && _other_regions.is_empty(); } + bool occupancy_less_or_equal_than(size_t occ) const { + return (strong_code_roots_list_length() == 0) && _other_regions.occupancy_less_or_equal_than(occ); + } + size_t occupied() { MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); return occupied_locked(); diff --git a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp index 8fde2455f..b6544308f 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp @@ -420,6 +420,7 @@ void FreeRegionList_test() { ReservedSpace bot_rs(G1BlockOffsetSharedArray::compute_size(heap.word_size())); G1RegionToSpaceMapper* bot_storage = G1RegionToSpaceMapper::create_mapper(bot_rs, + bot_rs.size(), os::vm_page_size(), HeapRegion::GrainBytes, G1BlockOffsetSharedArray::N_bytes, diff --git a/src/share/vm/gc_implementation/g1/satbQueue.cpp b/src/share/vm/gc_implementation/g1/satbQueue.cpp index fbd71a2dc..3544db550 100644 --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,36 +29,74 @@ #include "memory/sharedHeap.hpp" #include "oops/oop.inline.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" #include "runtime/thread.hpp" #include "runtime/vmThread.hpp" PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC void ObjPtrQueue::flush() { - // The buffer might contain refs into the CSet. We have to filter it - // first before we flush it, otherwise we might end up with an - // enqueued buffer with refs into the CSet which breaks our invariants. + // Filter now to possibly save work later. If filtering empties the + // buffer then flush_impl can deallocate the buffer. filter(); flush_impl(); } -// This method removes entries from an SATB buffer that will not be -// useful to the concurrent marking threads. An entry is removed if it -// satisfies one of the following conditions: +// Return true if a SATB buffer entry refers to an object that +// requires marking. // -// * it points to an object outside the G1 heap (G1's concurrent -// marking only visits objects inside the G1 heap), -// * it points to an object that has been allocated since marking -// started (according to SATB those objects do not need to be -// visited during marking), or -// * it points to an object that has already been marked (no need to -// process it again). +// The entry must point into the G1 heap. In particular, it must not +// be a NULL pointer. NULL pointers are pre-filtered and never +// inserted into a SATB buffer. // -// The rest of the entries will be retained and are compacted towards -// the top of the buffer. Note that, because we do not allow old -// regions in the CSet during marking, all objects on the CSet regions -// are young (eden or survivors) and therefore implicitly live. So any -// references into the CSet will be removed during filtering. +// An entry that is below the NTAMS pointer for the containing heap +// region requires marking. Such an entry must point to a valid object. +// +// An entry that is at least the NTAMS pointer for the containing heap +// region might be any of the following, none of which should be marked. +// +// * A reference to an object allocated since marking started. +// According to SATB, such objects are implicitly kept live and do +// not need to be dealt with via SATB buffer processing. +// +// * A reference to a young generation object. Young objects are +// handled separately and are not marked by concurrent marking. +// +// * A stale reference to a young generation object. If a young +// generation object reference is recorded and not filtered out +// before being moved by a young collection, the reference becomes +// stale. +// +// * A stale reference to an eagerly reclaimed humongous object. If a +// humongous object is recorded and then reclaimed, the reference +// becomes stale. +// +// The stale reference cases are implicitly handled by the NTAMS +// comparison. Because of the possibility of stale references, buffer +// processing must be somewhat circumspect and not assume entries +// in an unfiltered buffer refer to valid objects. + +inline bool requires_marking(const void* entry, G1CollectedHeap* heap) { + // Includes rejection of NULL pointers. + assert(heap->is_in_reserved(entry), + err_msg("Non-heap pointer in SATB buffer: " PTR_FORMAT, p2i(entry))); + + HeapRegion* region = heap->heap_region_containing_raw(entry); + assert(region != NULL, err_msg("No region for " PTR_FORMAT, p2i(entry))); + if (entry >= region->next_top_at_mark_start()) { + return false; + } + + assert(((oop)entry)->is_oop(true /* ignore mark word */), + err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(entry))); + + return true; +} + +// This method removes entries from a SATB buffer that will not be +// useful to the concurrent marking threads. Entries are retained if +// they require marking and are not already marked. Retained entries +// are compacted toward the top of the buffer. void ObjPtrQueue::filter() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -80,26 +118,25 @@ void ObjPtrQueue::filter() { assert(i > 0, "we should have at least one more entry to process"); i -= oopSize; debug_only(entries += 1;) - oop* p = (oop*) &buf[byte_index_to_index((int) i)]; - oop obj = *p; + void** p = &buf[byte_index_to_index((int) i)]; + void* entry = *p; // NULL the entry so that unused parts of the buffer contain NULLs // at the end. If we are going to retain it we will copy it to its // final place. If we have retained all entries we have visited so // far, we'll just end up copying it to the same place. *p = NULL; - bool retain = g1h->is_obj_ill(obj); - if (retain) { + if (requires_marking(entry, g1h) && !g1h->isMarkedNext((oop)entry)) { assert(new_index > 0, "we should not have already filled up the buffer"); new_index -= oopSize; assert(new_index >= i, "new_index should never be below i, as we alwaysr compact 'up'"); - oop* new_p = (oop*) &buf[byte_index_to_index((int) new_index)]; + void** new_p = &buf[byte_index_to_index((int) new_index)]; assert(new_p >= p, "the destination location should never be below " "the source as we always compact 'up'"); assert(*new_p == NULL, "we should have already cleared the destination location"); - *new_p = obj; + *new_p = entry; debug_only(retained += 1;) } } @@ -126,10 +163,7 @@ bool ObjPtrQueue::should_enqueue_buffer() { assert(_lock == NULL || _lock->owned_by_self(), "we should have taken the lock before calling this"); - // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to - // filter the buffer given that this will remove any references into - // the CSet as we currently assume that no such refs will appear in - // enqueued buffers. + // If G1SATBBufferEnqueueingThresholdPercent == 0 we could skip filtering. // This method should only be called if there is a non-NULL buffer // that is full. @@ -146,31 +180,19 @@ bool ObjPtrQueue::should_enqueue_buffer() { return should_enqueue; } -void ObjPtrQueue::apply_closure(ObjectClosure* cl) { +void ObjPtrQueue::apply_closure_and_empty(SATBBufferClosure* cl) { + assert(SafepointSynchronize::is_at_safepoint(), + "SATB queues must only be processed at safepoints"); if (_buf != NULL) { - apply_closure_to_buffer(cl, _buf, _index, _sz); - } -} - -void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) { - if (_buf != NULL) { - apply_closure_to_buffer(cl, _buf, _index, _sz); + assert(_index % sizeof(void*) == 0, "invariant"); + assert(_sz % sizeof(void*) == 0, "invariant"); + assert(_index <= _sz, "invariant"); + cl->do_buffer(_buf + byte_index_to_index((int)_index), + byte_index_to_index((int)(_sz - _index))); _index = _sz; } } -void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl, - void** buf, size_t index, size_t sz) { - if (cl == NULL) return; - for (size_t i = index; i < sz; i += oopSize) { - oop obj = (oop)buf[byte_index_to_index((int)i)]; - // There can be NULL entries because of destructors. - if (obj != NULL) { - cl->do_object(obj); - } - } -} - #ifndef PRODUCT // Helpful for debugging @@ -186,23 +208,12 @@ void ObjPtrQueue::print(const char* name, } #endif // PRODUCT -#ifdef ASSERT -void ObjPtrQueue::verify_oops_in_buffer() { - if (_buf == NULL) return; - for (size_t i = _index; i < _sz; i += oopSize) { - oop obj = (oop)_buf[byte_index_to_index((int)i)]; - assert(obj != NULL && obj->is_oop(true /* ignore mark word */), - "Not an oop"); - } -} -#endif - #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away #pragma warning( disable:4355 ) // 'this' : used in base member initializer list #endif // _MSC_VER SATBMarkQueueSet::SATBMarkQueueSet() : - PtrQueueSet(), _closure(NULL), _par_closures(NULL), + PtrQueueSet(), _shared_satb_queue(this, true /*perm*/) { } void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, @@ -210,13 +221,9 @@ void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, Mutex* lock) { PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1); _shared_satb_queue.set_lock(lock); - if (ParallelGCThreads > 0) { - _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads, mtGC); - } } void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) { - DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();) t->satb_mark_queue().handle_zero_index(); } @@ -276,17 +283,7 @@ void SATBMarkQueueSet::filter_thread_buffers() { shared_satb_queue()->filter(); } -void SATBMarkQueueSet::set_closure(ObjectClosure* closure) { - _closure = closure; -} - -void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) { - assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition"); - _par_closures[i] = par_closure; -} - -bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, - uint worker) { +bool SATBMarkQueueSet::apply_closure_to_completed_buffer(SATBBufferClosure* cl) { BufferNode* nd = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); @@ -298,10 +295,20 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, if (_n_completed_buffers == 0) _process_completed = false; } } - ObjectClosure* cl = (par ? _par_closures[worker] : _closure); if (nd != NULL) { void **buf = BufferNode::make_buffer_from_node(nd); - ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); + // Skip over NULL entries at beginning (e.g. push end) of buffer. + // Filtering can result in non-full completed buffers; see + // should_enqueue_buffer. + assert(_sz % sizeof(void*) == 0, "invariant"); + size_t limit = ObjPtrQueue::byte_index_to_index((int)_sz); + for (size_t i = 0; i < limit; ++i) { + if (buf[i] != NULL) { + // Found the end of the block of NULLs; process the remainder. + cl->do_buffer(buf + i, limit - i); + break; + } + } deallocate_buffer(buf); return true; } else { @@ -309,28 +316,6 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, } } -void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) { - assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - assert(cl != NULL, "pre-condition"); - - BufferNode* nd = _completed_buffers_head; - while (nd != NULL) { - void** buf = BufferNode::make_buffer_from_node(nd); - ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); - nd = nd->next(); - } -} - -void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) { - assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - assert(cl != NULL, "pre-condition"); - - for (JavaThread* t = Threads::first(); t; t = t->next()) { - t->satb_mark_queue().apply_closure(cl); - } - shared_satb_queue()->apply_closure(cl); -} - #ifndef PRODUCT // Helpful for debugging diff --git a/src/share/vm/gc_implementation/g1/satbQueue.hpp b/src/share/vm/gc_implementation/g1/satbQueue.hpp index 36af72e60..594895919 100644 --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,32 +25,30 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP +#include "memory/allocation.hpp" #include "gc_implementation/g1/ptrQueue.hpp" -class ObjectClosure; class JavaThread; class SATBMarkQueueSet; +// Base class for processing the contents of a SATB buffer. +class SATBBufferClosure : public StackObj { +protected: + ~SATBBufferClosure() { } + +public: + // Process the SATB entries in the designated buffer range. + virtual void do_buffer(void** buffer, size_t size) = 0; +}; + // A ptrQueue whose elements are "oops", pointers to object heads. class ObjPtrQueue: public PtrQueue { - friend class Threads; friend class SATBMarkQueueSet; - friend class G1RemarkThreadsClosure; private: // Filter out unwanted entries from the buffer. void filter(); - // Apply the closure to all elements. - void apply_closure(ObjectClosure* cl); - - // Apply the closure to all elements and empty the buffer; - void apply_closure_and_empty(ObjectClosure* cl); - - // Apply the closure to all elements of "buf", down to "index" (inclusive.) - static void apply_closure_to_buffer(ObjectClosure* cl, - void** buf, size_t index, size_t sz); - public: ObjPtrQueue(PtrQueueSet* qset, bool perm = false) : // SATB queues are only active during marking cycles. We create @@ -63,6 +61,10 @@ public: // Process queue entries and free resources. void flush(); + // Apply cl to the active part of the buffer. + // Prerequisite: Must be at a safepoint. + void apply_closure_and_empty(SATBBufferClosure* cl); + // Overrides PtrQueue::should_enqueue_buffer(). See the method's // definition for more information. virtual bool should_enqueue_buffer(); @@ -72,21 +74,11 @@ public: void print(const char* name); static void print(const char* name, void** buf, size_t index, size_t sz); #endif // PRODUCT - - void verify_oops_in_buffer() NOT_DEBUG_RETURN; }; class SATBMarkQueueSet: public PtrQueueSet { - ObjectClosure* _closure; - ObjectClosure** _par_closures; // One per ParGCThread. - ObjPtrQueue _shared_satb_queue; - // Utility function to support sequential and parallel versions. If - // "par" is true, then "worker" is the par thread id; if "false", worker - // is ignored. - bool apply_closure_to_completed_buffer_work(bool par, uint worker); - #ifdef ASSERT void dump_active_states(bool expected_active); void verify_active_states(bool expected_active); @@ -110,32 +102,12 @@ public: // Filter all the currently-active SATB buffers. void filter_thread_buffers(); - // Register "blk" as "the closure" for all queues. Only one such closure - // is allowed. The "apply_closure_to_completed_buffer" method will apply - // this closure to a completed buffer, and "iterate_closure_all_threads" - // applies it to partially-filled buffers (the latter should only be done - // with the world stopped). - void set_closure(ObjectClosure* closure); - // Set the parallel closures: pointer is an array of pointers to - // closures, one for each parallel GC thread. - void set_par_closure(int i, ObjectClosure* closure); - - // If there exists some completed buffer, pop it, then apply the - // registered closure to all its elements, and return true. If no - // completed buffers exist, return false. - bool apply_closure_to_completed_buffer() { - return apply_closure_to_completed_buffer_work(false, 0); - } - // Parallel version of the above. - bool par_apply_closure_to_completed_buffer(uint worker) { - return apply_closure_to_completed_buffer_work(true, worker); - } - - // Apply the given closure on enqueued and currently-active buffers - // respectively. Both methods are read-only, i.e., they do not - // modify any of the buffers. - void iterate_completed_buffers_read_only(ObjectClosure* cl); - void iterate_thread_buffers_read_only(ObjectClosure* cl); + // If there exists some completed buffer, pop and process it, and + // return true. Otherwise return false. Processing a buffer + // consists of applying the closure to the buffer range starting + // with the first non-NULL entry to the end of the buffer; the + // leading entries may be NULL due to filtering. + bool apply_closure_to_completed_buffer(SATBBufferClosure* cl); #ifndef PRODUCT // Helpful for debugging diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp index 80a0f7281..47267f21d 100644 --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,12 +34,11 @@ #include "gc_implementation/g1/vm_operations_g1.hpp" #include "runtime/interfaceSupport.hpp" -VM_G1CollectForAllocation::VM_G1CollectForAllocation( - unsigned int gc_count_before, - size_t word_size) +VM_G1CollectForAllocation::VM_G1CollectForAllocation(uint gc_count_before, + size_t word_size) : VM_G1OperationWithAllocRequest(gc_count_before, word_size, GCCause::_allocation_failure) { - guarantee(word_size > 0, "an allocation should always be requested"); + guarantee(word_size != 0, "An allocation should always be requested with this operation."); } void VM_G1CollectForAllocation::doit() { @@ -57,12 +56,11 @@ void VM_G1CollectFull::doit() { g1h->do_full_collection(false /* clear_all_soft_refs */); } -VM_G1IncCollectionPause::VM_G1IncCollectionPause( - unsigned int gc_count_before, - size_t word_size, - bool should_initiate_conc_mark, - double target_pause_time_ms, - GCCause::Cause gc_cause) +VM_G1IncCollectionPause::VM_G1IncCollectionPause(uint gc_count_before, + size_t word_size, + bool should_initiate_conc_mark, + double target_pause_time_ms, + GCCause::Cause gc_cause) : VM_G1OperationWithAllocRequest(gc_count_before, word_size, gc_cause), _should_initiate_conc_mark(should_initiate_conc_mark), _target_pause_time_ms(target_pause_time_ms), @@ -75,7 +73,7 @@ VM_G1IncCollectionPause::VM_G1IncCollectionPause( } bool VM_G1IncCollectionPause::doit_prologue() { - bool res = VM_GC_Operation::doit_prologue(); + bool res = VM_G1OperationWithAllocRequest::doit_prologue(); if (!res) { if (_should_initiate_conc_mark) { // The prologue can fail for a couple of reasons. The first is that another GC @@ -92,12 +90,8 @@ bool VM_G1IncCollectionPause::doit_prologue() { void VM_G1IncCollectionPause::doit() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); - assert(!_should_initiate_conc_mark || - ((_gc_cause == GCCause::_gc_locker && GCLockerInvokesConcurrent) || - (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) || - _gc_cause == GCCause::_g1_humongous_allocation || - _gc_cause == GCCause::_update_allocation_context_stats_inc), - "only a GC locker, a System.gc(), stats update or a hum allocation induced GC should start a cycle"); + assert(!_should_initiate_conc_mark || g1h->should_do_concurrent_full_gc(_gc_cause), + "only a GC locker, a System.gc(), stats update, whitebox, or a hum allocation induced GC should start a cycle"); if (_word_size > 0) { // An allocation has been requested. So, try to do that first. @@ -169,7 +163,7 @@ void VM_G1IncCollectionPause::doit() { } void VM_G1IncCollectionPause::doit_epilogue() { - VM_GC_Operation::doit_epilogue(); + VM_G1OperationWithAllocRequest::doit_epilogue(); // If the pause was initiated by a System.gc() and // +ExplicitGCInvokesConcurrent, we have to wait here for the cycle @@ -230,7 +224,6 @@ void VM_CGC_Operation::release_and_notify_pending_list_lock() { } void VM_CGC_Operation::doit() { - gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps); TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty); GCTraceTime t(_printGCMessage, G1Log::fine(), true, G1CollectedHeap::heap()->gc_timer_cm(), G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()); SharedHeap* sh = SharedHeap::heap(); diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp index c8014d415..265eb37d3 100644 --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,20 +36,17 @@ // - VM_G1CollectForAllocation // - VM_G1IncCollectionPause -class VM_G1OperationWithAllocRequest: public VM_GC_Operation { +class VM_G1OperationWithAllocRequest : public VM_CollectForAllocation { protected: - size_t _word_size; - HeapWord* _result; bool _pause_succeeded; AllocationContext_t _allocation_context; public: - VM_G1OperationWithAllocRequest(unsigned int gc_count_before, - size_t word_size, + VM_G1OperationWithAllocRequest(uint gc_count_before, + size_t word_size, GCCause::Cause gc_cause) - : VM_GC_Operation(gc_count_before, gc_cause), - _word_size(word_size), _result(NULL), _pause_succeeded(false) { } - HeapWord* result() { return _result; } + : VM_CollectForAllocation(word_size, gc_count_before, gc_cause), + _pause_succeeded(false) {} bool pause_succeeded() { return _pause_succeeded; } void set_allocation_context(AllocationContext_t context) { _allocation_context = context; } AllocationContext_t allocation_context() { return _allocation_context; } @@ -57,8 +54,8 @@ public: class VM_G1CollectFull: public VM_GC_Operation { public: - VM_G1CollectFull(unsigned int gc_count_before, - unsigned int full_gc_count_before, + VM_G1CollectFull(uint gc_count_before, + uint full_gc_count_before, GCCause::Cause cause) : VM_GC_Operation(gc_count_before, cause, full_gc_count_before, true) { } virtual VMOp_Type type() const { return VMOp_G1CollectFull; } @@ -70,7 +67,7 @@ public: class VM_G1CollectForAllocation: public VM_G1OperationWithAllocRequest { public: - VM_G1CollectForAllocation(unsigned int gc_count_before, + VM_G1CollectForAllocation(uint gc_count_before, size_t word_size); virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; } virtual void doit(); @@ -84,9 +81,9 @@ private: bool _should_initiate_conc_mark; bool _should_retry_gc; double _target_pause_time_ms; - unsigned int _old_marking_cycles_completed_before; + uint _old_marking_cycles_completed_before; public: - VM_G1IncCollectionPause(unsigned int gc_count_before, + VM_G1IncCollectionPause(uint gc_count_before, size_t word_size, bool should_initiate_conc_mark, double target_pause_time_ms, diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index 4d407dbfa..8ef1bd2cd 100644 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -622,7 +622,7 @@ void ParNewGenTask::work(uint worker_id) { true, // Process younger gens, if any, // as strong roots. false, // no scope; this is parallel code - SharedHeap::SO_ScavengeCodeCache, + GenCollectedHeap::SO_ScavengeCodeCache, GenCollectedHeap::StrongAndWeakRoots, &par_scan_state.to_space_root_closure(), &par_scan_state.older_gen_closure(), @@ -1197,8 +1197,10 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo( return real_forwardee(old); } - new_obj = _next_gen->par_promote(par_scan_state->thread_num(), - old, m, sz); + if (!_promotion_failed) { + new_obj = _next_gen->par_promote(par_scan_state->thread_num(), + old, m, sz); + } if (new_obj == NULL) { // promotion failed, forward to self diff --git a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp index e417cd495..f4f736a64 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp @@ -66,9 +66,10 @@ void GenerationSizer::initialize_flags() { void GenerationSizer::initialize_size_info() { trace_gen_sizes("ps heap raw"); - const size_t page_sz = os::page_size_for_region(_min_heap_byte_size, - _max_heap_byte_size, - 8); + const size_t max_page_sz = os::page_size_for_region_aligned(_max_heap_byte_size, 8); + const size_t min_pages = 4; // 1 for eden + 1 for each survivor + 1 for old + const size_t min_page_sz = os::page_size_for_region_aligned(_min_heap_byte_size, min_pages); + const size_t page_sz = MIN2(max_page_sz, min_page_sz); // Can a page size be something else than a power of two? assert(is_power_of_2((intptr_t)page_sz), "must be a power of 2"); diff --git a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp index 19a055cc0..1dde10746 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp @@ -55,7 +55,7 @@ ParMarkBitMap::initialize(MemRegion covered_region) const size_t words = bits / BitsPerWord; const size_t raw_bytes = words * sizeof(idx_t); - const size_t page_sz = os::page_size_for_region(raw_bytes, raw_bytes, 10); + const size_t page_sz = os::page_size_for_region_aligned(raw_bytes, 10); const size_t granularity = os::vm_allocation_granularity(); _reserved_byte_size = align_size_up(raw_bytes, MAX2(page_sz, granularity)); diff --git a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp index 7a7805379..f407f20b7 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -261,7 +261,7 @@ HeapWord* ParallelScavengeHeap::mem_allocate( uint loop_count = 0; uint gc_count = 0; - int gclocker_stalled_count = 0; + uint gclocker_stalled_count = 0; while (result == NULL) { // We don't want to have multiple collections for a single filled generation. @@ -521,8 +521,8 @@ void ParallelScavengeHeap::collect(GCCause::Cause cause) { assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock"); - unsigned int gc_count = 0; - unsigned int full_gc_count = 0; + uint gc_count = 0; + uint full_gc_count = 0; { MutexLocker ml(Heap_lock); // This value is guarded by the Heap_lock diff --git a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp index 77ab2cb17..114d39dfe 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp @@ -167,7 +167,6 @@ bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) { { HandleMark hm; - gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer->gc_id()); TraceCollectorStats tcs(counters()); diff --git a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp index e97a041b9..8175ded1a 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @@ -401,7 +401,7 @@ PSVirtualSpace* ParallelCompactData::create_vspace(size_t count, size_t element_size) { const size_t raw_bytes = count * element_size; - const size_t page_sz = os::page_size_for_region(raw_bytes, raw_bytes, 10); + const size_t page_sz = os::page_size_for_region_aligned(raw_bytes, 10); const size_t granularity = os::vm_allocation_granularity(); _reserved_byte_size = align_size_up(raw_bytes, MAX2(page_sz, granularity)); @@ -2054,7 +2054,6 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { gc_task_manager()->task_idle_workers(); heap->set_par_threads(gc_task_manager()->active_workers()); - gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id()); TraceCollectorStats tcs(counters()); diff --git a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp index 4d835314c..881f380ce 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp @@ -348,7 +348,7 @@ public: HeapWord* _partial_obj_addr; region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; - bool _blocks_filled; + bool volatile _blocks_filled; #ifdef ASSERT size_t _blocks_filled_count; // Number of block table fills. @@ -499,7 +499,9 @@ ParallelCompactData::RegionData::destination_count() const inline bool ParallelCompactData::RegionData::blocks_filled() const { - return _blocks_filled; + bool result = _blocks_filled; + OrderAccess::acquire(); + return result; } #ifdef ASSERT @@ -513,6 +515,7 @@ ParallelCompactData::RegionData::blocks_filled_count() const inline void ParallelCompactData::RegionData::set_blocks_filled() { + OrderAccess::release(); _blocks_filled = true; // Debug builds count the number of times the table was filled. DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count)); diff --git a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp index 7eea946a5..6d728ccf5 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp @@ -329,7 +329,6 @@ bool PSScavenge::invoke_no_policy() { ResourceMark rm; HandleMark hm; - gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); GCTraceTime t1(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id()); TraceCollectorStats tcs(counters()); diff --git a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp index 3a8f347bc..9148bb4ff 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,12 +32,10 @@ #include "utilities/dtrace.hpp" // The following methods are used by the parallel scavenge collector -VM_ParallelGCFailedAllocation::VM_ParallelGCFailedAllocation(size_t size, - unsigned int gc_count) : - VM_GC_Operation(gc_count, GCCause::_allocation_failure), - _size(size), - _result(NULL) -{ +VM_ParallelGCFailedAllocation::VM_ParallelGCFailedAllocation(size_t word_size, + uint gc_count) : + VM_CollectForAllocation(word_size, gc_count, GCCause::_allocation_failure) { + assert(word_size != 0, "An allocation should always be requested with this operation."); } void VM_ParallelGCFailedAllocation::doit() { @@ -47,7 +45,7 @@ void VM_ParallelGCFailedAllocation::doit() { assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "must be a ParallelScavengeHeap"); GCCauseSetter gccs(heap, _gc_cause); - _result = heap->failed_mem_allocate(_size); + _result = heap->failed_mem_allocate(_word_size); if (_result == NULL && GC_locker::is_active_and_needs_gc()) { set_gc_locked(); @@ -55,8 +53,8 @@ void VM_ParallelGCFailedAllocation::doit() { } // Only used for System.gc() calls -VM_ParallelGCSystemGC::VM_ParallelGCSystemGC(unsigned int gc_count, - unsigned int full_gc_count, +VM_ParallelGCSystemGC::VM_ParallelGCSystemGC(uint gc_count, + uint full_gc_count, GCCause::Cause gc_cause) : VM_GC_Operation(gc_count, gc_cause, full_gc_count, true /* full */) { diff --git a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.hpp b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.hpp index 53ac77dc4..7ffe25161 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.hpp +++ b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,26 +29,19 @@ #include "gc_implementation/shared/vmGCOperations.hpp" #include "gc_interface/gcCause.hpp" -class VM_ParallelGCFailedAllocation: public VM_GC_Operation { - private: - size_t _size; - HeapWord* _result; - +class VM_ParallelGCFailedAllocation : public VM_CollectForAllocation { public: - VM_ParallelGCFailedAllocation(size_t size, unsigned int gc_count); + VM_ParallelGCFailedAllocation(size_t word_size, uint gc_count); virtual VMOp_Type type() const { return VMOp_ParallelGCFailedAllocation; } virtual void doit(); - - HeapWord* result() const { return _result; } }; class VM_ParallelGCSystemGC: public VM_GC_Operation { public: - VM_ParallelGCSystemGC(unsigned int gc_count, unsigned int full_gc_count, - GCCause::Cause gc_cause); + VM_ParallelGCSystemGC(uint gc_count, uint full_gc_count, GCCause::Cause gc_cause); virtual VMOp_Type type() const { return VMOp_ParallelGCSystemGC; } virtual void doit(); }; diff --git a/src/share/vm/gc_implementation/shared/ageTable.hpp b/src/share/vm/gc_implementation/shared/ageTable.hpp index 9e2ee9999..44d8e0ace 100644 --- a/src/share/vm/gc_implementation/shared/ageTable.hpp +++ b/src/share/vm/gc_implementation/shared/ageTable.hpp @@ -55,7 +55,10 @@ class ageTable VALUE_OBJ_CLASS_SPEC { // add entry void add(oop p, size_t oop_size) { - uint age = p->age(); + add(p->age(), oop_size); + } + + void add(uint age, size_t oop_size) { assert(age > 0 && age < table_size, "invalid age of object"); sizes[age] += oop_size; } diff --git a/src/share/vm/gc_implementation/shared/gcTraceTime.cpp b/src/share/vm/gc_implementation/shared/gcTraceTime.cpp index 83890611e..fff7eea8c 100644 --- a/src/share/vm/gc_implementation/shared/gcTraceTime.cpp +++ b/src/share/vm/gc_implementation/shared/gcTraceTime.cpp @@ -49,10 +49,8 @@ GCTraceTime::GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* t } if (_doit) { - if (PrintGCTimeStamps) { - gclog_or_tty->stamp(); - gclog_or_tty->print(": "); - } + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); if (PrintGCID) { gclog_or_tty->print("#%u: ", gc_id.id()); } diff --git a/src/share/vm/gc_implementation/shared/mutableSpace.cpp b/src/share/vm/gc_implementation/shared/mutableSpace.cpp index 17a3ecbc1..27a02b68d 100644 --- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp +++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp @@ -62,9 +62,7 @@ void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) { } void MutableSpace::pretouch_pages(MemRegion mr) { - for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) { - char t = *p; *p = t; - } + os::pretouch_memory((char*)mr.start(), (char*)mr.end()); } void MutableSpace::initialize(MemRegion mr, diff --git a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp index ea1760e53..972099b9c 100644 --- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp @@ -193,10 +193,10 @@ void VM_GenCollectForAllocation::doit() { GenCollectedHeap* gch = GenCollectedHeap::heap(); GCCauseSetter gccs(gch, _gc_cause); - _res = gch->satisfy_failed_allocation(_size, _tlab); - assert(gch->is_in_reserved_or_null(_res), "result not in heap"); + _result = gch->satisfy_failed_allocation(_word_size, _tlab); + assert(gch->is_in_reserved_or_null(_result), "result not in heap"); - if (_res == NULL && GC_locker::is_active_and_needs_gc()) { + if (_result == NULL && GC_locker::is_active_and_needs_gc()) { set_gc_locked(); } } @@ -209,6 +209,18 @@ void VM_GenCollectFull::doit() { gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level); } +VM_CollectForMetadataAllocation::VM_CollectForMetadataAllocation(ClassLoaderData* loader_data, + size_t size, + Metaspace::MetadataType mdtype, + uint gc_count_before, + uint full_gc_count_before, + GCCause::Cause gc_cause) + : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true), + _loader_data(loader_data), _size(size), _mdtype(mdtype), _result(NULL) { + assert(_size != 0, "An allocation should always be requested with this operation."); + AllocTracer::send_allocation_requiring_gc_event(_size * HeapWordSize, GCId::peek()); +} + // Returns true iff concurrent GCs unloads metadata. bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() { #if INCLUDE_ALL_GCS @@ -313,3 +325,11 @@ void VM_CollectForMetadataAllocation::doit() { set_gc_locked(); } } + +VM_CollectForAllocation::VM_CollectForAllocation(size_t word_size, uint gc_count_before, GCCause::Cause cause) + : VM_GC_Operation(gc_count_before, cause), _result(NULL), _word_size(word_size) { + // Only report if operation was really caused by an allocation. + if (_word_size != 0) { + AllocTracer::send_allocation_requiring_gc_event(_word_size * HeapWordSize, GCId::peek()); + } +} diff --git a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp index 8f60dcbdb..b8027a972 100644 --- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_VMGCOPERATIONS_HPP #define SHARE_VM_GC_IMPLEMENTATION_SHARED_VMGCOPERATIONS_HPP +#include "gc_implementation/shared/gcId.hpp" #include "memory/heapInspection.hpp" #include "runtime/handles.hpp" #include "runtime/jniHandles.hpp" @@ -38,11 +39,12 @@ // VM_Operation // VM_GC_Operation // VM_GC_HeapInspection -// VM_GenCollectForAllocation // VM_GenCollectFull // VM_GenCollectFullConcurrent -// VM_ParallelGCFailedAllocation // VM_ParallelGCSystemGC +// VM_CollectForAllocation +// VM_GenCollectForAllocation +// VM_ParallelGCFailedAllocation // VM_GC_Operation // - implements methods common to all classes in the hierarchy: // prevents multiple gc requests and manages lock on heap; @@ -51,6 +53,7 @@ // - prints class histogram on SIGBREAK if PrintClassHistogram // is specified; and also the attach "inspectheap" operation // +// VM_CollectForAllocation // VM_GenCollectForAllocation // VM_ParallelGCFailedAllocation // - this operation is invoked when allocation is failed; @@ -66,13 +69,13 @@ class VM_GC_Operation: public VM_Operation { protected: - BasicLock _pending_list_basic_lock; // for refs pending list notification (PLL) - unsigned int _gc_count_before; // gc count before acquiring PLL - unsigned int _full_gc_count_before; // full gc count before acquiring PLL - bool _full; // whether a "full" collection - bool _prologue_succeeded; // whether doit_prologue succeeded + BasicLock _pending_list_basic_lock; // for refs pending list notification (PLL) + uint _gc_count_before; // gc count before acquiring PLL + uint _full_gc_count_before; // full gc count before acquiring PLL + bool _full; // whether a "full" collection + bool _prologue_succeeded; // whether doit_prologue succeeded GCCause::Cause _gc_cause; // the putative cause for this gc op - bool _gc_locked; // will be set if gc was locked + bool _gc_locked; // will be set if gc was locked virtual bool skip_operation() const; @@ -81,9 +84,9 @@ class VM_GC_Operation: public VM_Operation { void release_and_notify_pending_list_lock(); public: - VM_GC_Operation(unsigned int gc_count_before, + VM_GC_Operation(uint gc_count_before, GCCause::Cause _cause, - unsigned int full_gc_count_before = 0, + uint full_gc_count_before = 0, bool full = false) { _full = full; _prologue_succeeded = false; @@ -160,38 +163,45 @@ class VM_GC_HeapInspection: public VM_GC_Operation { bool collect(); }; +class VM_CollectForAllocation : public VM_GC_Operation { + protected: + size_t _word_size; // Size of object to be allocated (in number of words) + HeapWord* _result; // Allocation result (NULL if allocation failed) + + public: + VM_CollectForAllocation(size_t word_size, uint gc_count_before, GCCause::Cause cause); + + HeapWord* result() const { + return _result; + } +}; -class VM_GenCollectForAllocation: public VM_GC_Operation { +class VM_GenCollectForAllocation : public VM_CollectForAllocation { private: - HeapWord* _res; - size_t _size; // size of object to be allocated. bool _tlab; // alloc is of a tlab. public: - VM_GenCollectForAllocation(size_t size, + VM_GenCollectForAllocation(size_t word_size, bool tlab, - unsigned int gc_count_before) - : VM_GC_Operation(gc_count_before, GCCause::_allocation_failure), - _size(size), + uint gc_count_before) + : VM_CollectForAllocation(word_size, gc_count_before, GCCause::_allocation_failure), _tlab(tlab) { - _res = NULL; + assert(word_size != 0, "An allocation should always be requested with this operation."); } ~VM_GenCollectForAllocation() {} virtual VMOp_Type type() const { return VMOp_GenCollectForAllocation; } virtual void doit(); - HeapWord* result() const { return _res; } }; - // VM operation to invoke a collection of the heap as a // GenCollectedHeap heap. class VM_GenCollectFull: public VM_GC_Operation { private: int _max_level; public: - VM_GenCollectFull(unsigned int gc_count_before, - unsigned int full_gc_count_before, + VM_GenCollectFull(uint gc_count_before, + uint full_gc_count_before, GCCause::Cause gc_cause, - int max_level) + int max_level) : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true /* full */), _max_level(max_level) { } ~VM_GenCollectFull() {} @@ -208,12 +218,9 @@ class VM_CollectForMetadataAllocation: public VM_GC_Operation { public: VM_CollectForMetadataAllocation(ClassLoaderData* loader_data, size_t size, Metaspace::MetadataType mdtype, - unsigned int gc_count_before, - unsigned int full_gc_count_before, - GCCause::Cause gc_cause) - : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true), - _loader_data(loader_data), _size(size), _mdtype(mdtype), _result(NULL) { - } + uint gc_count_before, + uint full_gc_count_before, + GCCause::Cause gc_cause); virtual VMOp_Type type() const { return VMOp_CollectForMetadataAllocation; } virtual void doit(); MetaWord* result() const { return _result; } |