aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/x86
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/x86')
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp6
-rw-r--r--src/cpu/x86/vm/c1_FrameMap_x86.cpp7
-rw-r--r--src/cpu/x86/vm/c1_MacroAssembler_x86.cpp3
-rw-r--r--src/cpu/x86/vm/c1_Runtime1_x86.cpp12
-rw-r--r--src/cpu/x86/vm/frame_x86.cpp37
-rw-r--r--src/cpu/x86/vm/frame_x86.hpp15
-rw-r--r--src/cpu/x86/vm/frame_x86.inline.hpp8
-rw-r--r--src/cpu/x86/vm/globals_x86.hpp2
-rw-r--r--src/cpu/x86/vm/macroAssembler_x86.cpp27
-rw-r--r--src/cpu/x86/vm/methodHandles_x86.cpp2
-rw-r--r--src/cpu/x86/vm/runtime_x86_32.cpp4
-rw-r--r--src/cpu/x86/vm/sharedRuntime_x86_64.cpp12
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_32.cpp3
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_64.cpp6
-rw-r--r--src/cpu/x86/vm/vm_version_x86.hpp8
-rw-r--r--src/cpu/x86/vm/x86.ad15
-rw-r--r--src/cpu/x86/vm/x86_32.ad137
-rw-r--r--src/cpu/x86/vm/x86_64.ad478
18 files changed, 448 insertions, 334 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 2ac9df8c9..fc270dbbd 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -141,8 +141,10 @@ REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
#endif // _LP64
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
+// JSR 292
+// On x86, the SP does not have to be saved when invoking method handle intrinsics
+// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
+REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
// Address is an abstraction used to represent a memory location
// using any of the amd64 addressing modes with one object.
diff --git a/src/cpu/x86/vm/c1_FrameMap_x86.cpp b/src/cpu/x86/vm/c1_FrameMap_x86.cpp
index f75eca728..bec7eed65 100644
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp
@@ -343,14 +343,13 @@ LIR_Opr FrameMap::stack_pointer() {
return FrameMap::rsp_opr;
}
-
// JSR 292
+// On x86, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
- assert(rbp == rbp_mh_SP_save, "must be same register");
- return rbp_opr;
+ return LIR_OprFact::illegalOpr;
}
-
bool FrameMap::validate_frame() {
return true;
}
diff --git a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
index 9dfb27969..6fd6b793a 100644
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
@@ -359,6 +359,9 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
generate_stack_overflow_check(bang_size_in_bytes);
push(rbp);
+ if (PreserveFramePointer) {
+ mov(rbp, rsp);
+ }
#ifdef TIERED
// c2 leaves fpu stack dirty. Clean it on entry
if (UseSSE < 2 ) {
diff --git a/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
index 76303c114..9810f6e1c 100644
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
@@ -754,14 +754,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
// WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
// since we do a leave anyway.
- // Pop the return address since we are possibly changing SP (restoring from BP).
+ // Pop the return address.
__ leave();
__ pop(rcx);
-
- // Restore SP from BP if the exception PC is a method handle call site.
- NOT_LP64(__ get_thread(thread);)
- __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
- __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
__ jmp(rcx); // jump to exception handler
break;
default: ShouldNotReachHere();
@@ -832,11 +827,6 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
// the pop is also necessary to simulate the effect of a ret(0)
__ pop(exception_pc);
- // Restore SP from BP if the exception PC is a method handle call site.
- NOT_LP64(__ get_thread(thread);)
- __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
- __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
// continue at exception handler (return address removed)
// note: do *not* remove arguments when unwinding the
// activation since the caller assumes having
diff --git a/src/cpu/x86/vm/frame_x86.cpp b/src/cpu/x86/vm/frame_x86.cpp
index 212cdac3d..1a3364313 100644
--- a/src/cpu/x86/vm/frame_x86.cpp
+++ b/src/cpu/x86/vm/frame_x86.cpp
@@ -216,7 +216,8 @@ bool frame::safe_for_sender(JavaThread *thread) {
if (sender_blob->is_nmethod()) {
nmethod* nm = sender_blob->as_nmethod_or_null();
if (nm != NULL) {
- if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+ nm->method()->is_method_handle_intrinsic()) {
return false;
}
}
@@ -383,10 +384,9 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
// frame::verify_deopt_original_pc
//
// Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP. The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
#ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
frame fr;
// This is ugly but it's better than to change {get,set}_original_pc
@@ -396,33 +396,23 @@ void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool
address original_pc = nm->get_original_pc(&fr);
assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
- assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
}
#endif
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
void frame::adjust_unextended_sp() {
- // If we are returning to a compiled MethodHandle call site, the
- // saved_fp will in fact be a saved value of the unextended SP. The
- // simplest way to tell whether we are returning to such a call site
- // is as follows:
+ // On x86, sites calling method handle intrinsics and lambda forms are treated
+ // as any other call site. Therefore, no special action is needed when we are
+ // returning to any of these call sites.
nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
if (sender_nm != NULL) {
- // If the sender PC is a deoptimization point, get the original
- // PC. For MethodHandle call site the unextended_sp is stored in
- // saved_fp.
- if (sender_nm->is_deopt_mh_entry(_pc)) {
- DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
- _unextended_sp = _fp;
- }
- else if (sender_nm->is_deopt_entry(_pc)) {
+ // If the sender PC is a deoptimization point, get the original PC.
+ if (sender_nm->is_deopt_entry(_pc) ||
+ sender_nm->is_deopt_mh_entry(_pc)) {
DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
}
- else if (sender_nm->is_method_handle_return(_pc)) {
- _unextended_sp = _fp;
- }
}
}
@@ -717,3 +707,10 @@ intptr_t* frame::real_fp() const {
assert(! is_compiled_frame(), "unknown compiled frame size");
return fp();
}
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+ init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
diff --git a/src/cpu/x86/vm/frame_x86.hpp b/src/cpu/x86/vm/frame_x86.hpp
index 8c6a72c2e..1fd5b9c7a 100644
--- a/src/cpu/x86/vm/frame_x86.hpp
+++ b/src/cpu/x86/vm/frame_x86.hpp
@@ -76,11 +76,11 @@
// [locals and parameters ]
// <- sender sp
-// [1] When the c++ interpreter calls a new method it returns to the frame
+// [1] When the C++ interpreter calls a new method it returns to the frame
// manager which allocates a new frame on the stack. In that case there
// is no real callee of this newly allocated frame. The frame manager is
-// aware of the additional frame(s) and will pop them as nested calls
-// complete. Howevers tTo make it look good in the debugger the frame
+// aware of the additional frame(s) and will pop them as nested calls
+// complete. However, to make it look good in the debugger the frame
// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
// with a fake interpreter_state* parameter to make it easy to debug
// nested calls.
@@ -88,7 +88,7 @@
// Note that contrary to the layout for the assembly interpreter the
// expression stack allocated for the C++ interpreter is full sized.
// However this is not as bad as it seems as the interpreter frame_manager
-// will truncate the unused space on succesive method calls.
+// will truncate the unused space on successive method calls.
//
// ------------------------------ C++ interpreter ----------------------------------------
@@ -172,10 +172,7 @@
#ifdef ASSERT
// Used in frame::sender_for_{interpreter,compiled}_frame
- static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
- static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
- verify_deopt_original_pc(nm, unextended_sp, true);
- }
+ static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
#endif
public:
@@ -187,6 +184,8 @@
frame(intptr_t* sp, intptr_t* fp);
+ void init(intptr_t* sp, intptr_t* fp, address pc);
+
// accessors for the instance variables
// Note: not necessarily the real 'frame pointer' (see real_fp)
intptr_t* fp() const { return _fp; }
diff --git a/src/cpu/x86/vm/frame_x86.inline.hpp b/src/cpu/x86/vm/frame_x86.inline.hpp
index 9b875b834..a2649fdb1 100644
--- a/src/cpu/x86/vm/frame_x86.inline.hpp
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp
@@ -40,7 +40,7 @@ inline frame::frame() {
_deopt_state = unknown;
}
-inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
_sp = sp;
_unextended_sp = sp;
_fp = fp;
@@ -58,6 +58,10 @@ inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
}
}
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+ init(sp, fp, pc);
+}
+
inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
_sp = sp;
_unextended_sp = unextended_sp;
@@ -89,7 +93,7 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) {
// find_blob call. This is also why we can have no asserts on the validity
// of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
// -> pd_last_frame should use a specialized version of pd_last_frame which could
- // call a specilaized frame constructor instead of this one.
+ // call a specialized frame constructor instead of this one.
// Then we could use the assert below. However this assert is of somewhat dubious
// value.
// assert(_pc != NULL, "no pc?");
diff --git a/src/cpu/x86/vm/globals_x86.hpp b/src/cpu/x86/vm/globals_x86.hpp
index 1401997b3..57adca2ff 100644
--- a/src/cpu/x86/vm/globals_x86.hpp
+++ b/src/cpu/x86/vm/globals_x86.hpp
@@ -82,6 +82,8 @@ define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS
define_pd_global(uintx, TypeProfileLevel, 111);
+define_pd_global(bool, PreserveFramePointer, false);
+
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
\
develop(bool, IEEEPrecision, true, \
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
index 5857a9350..b3a72ab0e 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -6122,6 +6122,10 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
// We always push rbp, so that on return to interpreter rbp, will be
// restored correctly and we can correct the stack.
push(rbp);
+ // Save caller's stack pointer into RBP if the frame pointer is preserved.
+ if (PreserveFramePointer) {
+ mov(rbp, rsp);
+ }
// Remove word for ebp
framesize -= wordSize;
@@ -6136,6 +6140,13 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
// Save RBP register now.
framesize -= wordSize;
movptr(Address(rsp, framesize), rbp);
+ // Save caller's stack pointer into RBP if the frame pointer is preserved.
+ if (PreserveFramePointer) {
+ movptr(rbp, rsp);
+ if (framesize > 0) {
+ addptr(rbp, framesize);
+ }
+ }
}
if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
@@ -6690,7 +6701,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
- vzeroupper();
+ vpxor(vec1, vec1);
// compare wide vectors tail
bind(COMPARE_WIDE_TAIL);
@@ -6705,7 +6716,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
- vzeroupper();
+ vpxor(vec1, vec1);
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
jmp(COMPARE_16_CHARS);
@@ -6964,7 +6975,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
bind(DONE);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
- vzeroupper();
+ vpxor(vec1, vec1);
+ vpxor(vec2, vec2);
}
}
@@ -7098,7 +7110,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
BIND(L_check_fill_8_bytes);
// clean upper bits of YMM registers
- vzeroupper();
+ movdl(xtmp, value);
+ pshufd(xtmp, xtmp, 0);
} else {
// Fill 32-byte chunks
pshufd(xtmp, xtmp, 0);
@@ -7261,7 +7274,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
bind(L_copy_16_chars_exit);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
- vzeroupper();
+ vpxor(tmp2Reg, tmp2Reg);
+ vpxor(tmp3Reg, tmp3Reg);
+ vpxor(tmp4Reg, tmp4Reg);
+ movdl(tmp1Reg, tmp5);
+ pshufd(tmp1Reg, tmp1Reg, 0);
}
subptr(len, 8);
jccb(Assembler::greater, L_copy_8_chars_exit);
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
index 42c690f5e..2f1326a62 100644
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -373,7 +373,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
// member_reg - MemberName that was the trailing argument
// temp1_recv_klass - klass of stacked receiver, if needed
// rsi/r13 - interpreter linkage (if interpreted)
- // rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+ // rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
Label L_incompatible_class_change_error;
switch (iid) {
diff --git a/src/cpu/x86/vm/runtime_x86_32.cpp b/src/cpu/x86/vm/runtime_x86_32.cpp
index 1cc10d766..36457cb81 100644
--- a/src/cpu/x86/vm/runtime_x86_32.cpp
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp
@@ -126,10 +126,6 @@ void OptoRuntime::generate_exception_blob() {
// rax: exception handler for given <exception oop/exception pc>
- // Restore SP from BP if the exception PC is a MethodHandle call site.
- __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
- __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
// We have a handler in rax, (could be deopt blob)
// rdx - throwing pc, deopt blob will need it.
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
index be2bfcfa0..86ce38d24 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@@ -4017,8 +4017,8 @@ void OptoRuntime::generate_exception_blob() {
// Save callee-saved registers. See x86_64.ad.
- // rbp is an implicitly saved callee saved register (i.e. the calling
- // convention will save restore it in prolog/epilog) Other than that
+ // rbp is an implicitly saved callee saved register (i.e., the calling
+ // convention will save/restore it in the prolog/epilog). Other than that
// there are no callee save registers now that adapter frames are gone.
__ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
@@ -4060,9 +4060,9 @@ void OptoRuntime::generate_exception_blob() {
// Restore callee-saved registers
- // rbp is an implicitly saved callee saved register (i.e. the calling
+ // rbp is an implicitly saved callee-saved register (i.e., the calling
// convention will save restore it in prolog/epilog) Other than that
- // there are no callee save registers no that adapter frames are gone.
+ // there are no callee save registers now that adapter frames are gone.
__ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
@@ -4071,10 +4071,6 @@ void OptoRuntime::generate_exception_blob() {
// rax: exception handler
- // Restore SP from BP if the exception PC is a MethodHandle call site.
- __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
- __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
// We have a handler in rax (could be deopt blob).
__ mov(r8, rax);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 1622fe5ff..5387ba184 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -837,7 +837,8 @@ class StubGenerator: public StubCodeGenerator {
if (UseUnalignedLoadStores && (UseAVX >= 2)) {
// clean upper bits of YMM registers
- __ vzeroupper();
+ __ vpxor(xmm0, xmm0);
+ __ vpxor(xmm1, xmm1);
}
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 0000146f5..0bb6118d7 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -1328,7 +1328,8 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_end);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
- __ vzeroupper();
+ __ vpxor(xmm0, xmm0);
+ __ vpxor(xmm1, xmm1);
}
} else {
// Copy 32-bytes per iteration
@@ -1405,7 +1406,8 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_end);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
- __ vzeroupper();
+ __ vpxor(xmm0, xmm0);
+ __ vpxor(xmm1, xmm1);
}
} else {
// Copy 32-bytes per iteration
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp
index 1ad94e38b..49c9dba7e 100644
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -570,10 +570,12 @@ public:
static uint cores_per_cpu() {
uint result = 1;
if (is_intel()) {
- if (supports_processor_topology()) {
+ bool supports_topology = supports_processor_topology();
+ if (supports_topology) {
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
- } else {
+ }
+ if (!supports_topology || result == 0) {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
}
} else if (is_amd()) {
@@ -590,7 +592,7 @@ public:
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
}
- return result;
+ return (result == 0 ? 1 : result);
}
static intx prefetch_data_size() {
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
index b575a98a8..8d402c1c6 100644
--- a/src/cpu/x86/vm/x86.ad
+++ b/src/cpu/x86/vm/x86.ad
@@ -912,21 +912,6 @@ static inline jdouble replicate8_imm(int con, int width) {
encode %{
- enc_class preserve_SP %{
- debug_only(int off0 = cbuf.insts_size());
- MacroAssembler _masm(&cbuf);
- // RBP is preserved across all calls, even compiled calls.
- // Use it to preserve RSP in places where the callee might change the SP.
- __ movptr(rbp_mh_SP_save, rsp);
- debug_only(int off1 = cbuf.insts_size());
- assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
- %}
-
- enc_class restore_SP %{
- MacroAssembler _masm(&cbuf);
- __ movptr(rsp, rbp_mh_SP_save);
- %}
-
enc_class call_epilog %{
if (VerifyStackAtCalls) {
// Check that stack depth is unchanged: find majik cookie on stack
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index 63a7a0663..42aba5b21 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -123,50 +123,94 @@ alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
+// Class for no registers (empty set).
+reg_class no_reg();
+
// Class for all registers
-reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+// Class for all registers (excluding EBP)
+reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
+// Dynamic register class that selects at runtime between register classes
+// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
+// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
+reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
+
// Class for general registers
-reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
-// Class for general registers which may be used for implicit null checks on win95
-// Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+// Class for general registers (excluding EBP).
+// This register class can be used for implicit null checks on win95.
+// It is also safe for use by tailjumps (we don't want to allocate in ebp).
+// Used also if the PreserveFramePointer flag is true.
+reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
+// Dynamic register class that selects between int_reg and int_reg_no_ebp.
+reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
+
// Class of "X" registers
reg_class int_x_reg(EBX, ECX, EDX, EAX);
+
// Class of registers that can appear in an address with no offset.
// EBP and ESP require an extra instruction byte for zero offset.
// Used in fast-unlock
reg_class p_reg(EDX, EDI, ESI, EBX);
-// Class for general registers not including ECX
-reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
-// Class for general registers not including EAX
+
+// Class for general registers excluding ECX
+reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
+// Class for general registers excluding ECX (and EBP)
+reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
+// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
+reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
+
+// Class for general registers excluding EAX
reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
-// Class for general registers not including EAX or EBX.
-reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
+
+// Class for general registers excluding EAX and EBX.
+reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
+// Class for general registers excluding EAX and EBX (and EBP)
+reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
+// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
+reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
+
// Class of EAX (for multiply and divide operations)
reg_class eax_reg(EAX);
+
// Class of EBX (for atomic add)
reg_class ebx_reg(EBX);
+
// Class of ECX (for shift and JCXZ operations and cmpLTMask)
reg_class ecx_reg(ECX);
+
// Class of EDX (for multiply and divide operations)
reg_class edx_reg(EDX);
+
// Class of EDI (for synchronization)
reg_class edi_reg(EDI);
+
// Class of ESI (for synchronization)
reg_class esi_reg(ESI);
-// Singleton class for interpreter's stack pointer
-reg_class ebp_reg(EBP);
+
// Singleton class for stack pointer
reg_class sp_reg(ESP);
+
// Singleton class for instruction pointer
// reg_class ip_reg(EIP);
+
// Class of integer register pairs
-reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
+reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
+// Class of integer register pairs (excluding EBP and EDI);
+reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
+// Dynamic register class that selects between long_reg and long_reg_no_ebp.
+reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
+
// Class of integer register pairs that aligns with calling convention
reg_class eadx_reg( EAX,EDX );
reg_class ebcx_reg( ECX,EBX );
+
// Not AX or DX, used in divides
-reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
+reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
+// Not AX or DX (and neither EBP), used in divides
+reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
+// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
+reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
// Floating point registers. Notice FPR0 is not a choice.
// FPR0 is not ever allocated; we use clever encodings to fake
@@ -240,18 +284,11 @@ static int pre_call_resets_size() {
return size;
}
-static int preserve_SP_size() {
- return 2; // op, rm(reg/reg)
-}
-
// !!!!! Special hack to get all type of calls to specify the byte offset
// from the start of the call to the point where the return address
// will point.
int MachCallStaticJavaNode::ret_addr_offset() {
- int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
- if (_method_handle_invoke)
- offset += preserve_SP_size();
- return offset;
+ return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
}
int MachCallDynamicJavaNode::ret_addr_offset() {
@@ -285,15 +322,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
- current_offset += pre_call_resets_size(); // skip fldcw, if any
- current_offset += preserve_SP_size(); // skip mov rbp, rsp
- current_offset += 1; // skip call opcode byte
- return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
current_offset += pre_call_resets_size(); // skip fldcw, if any
current_offset += 5; // skip MOV instruction
@@ -523,6 +551,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("# stack bang (%d bytes)", bangsize);
st->print("\n\t");
st->print("PUSH EBP\t# Save EBP");
+ if (PreserveFramePointer) {
+ st->print("\n\t");
+ st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
+ }
if (framesize) {
st->print("\n\t");
st->print("SUB ESP, #%d\t# Create frame",framesize);
@@ -532,6 +564,14 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("\n\t");
framesize -= wordSize;
st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
+ if (PreserveFramePointer) {
+ st->print("\n\t");
+ st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
+ if (framesize > 0) {
+ st->print("\n\t");
+ st->print("ADD EBP, #%d", framesize);
+ }
+ }
}
if (VerifyStackAtCalls) {
@@ -1488,7 +1528,7 @@ RegMask Matcher::modL_proj_mask() {
}
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
- return EBP_REG_mask();
+ return NO_REG_mask();
}
// Returns true if the high 32 bits of the value is known to be zero.
@@ -3734,7 +3774,7 @@ operand eRegP() %{
// On windows95, EBP is not safe to use for implicit null tests.
operand eRegP_no_EBP() %{
- constraint(ALLOC_IN_RC(int_reg_no_rbp));
+ constraint(ALLOC_IN_RC(int_reg_no_ebp));
match(RegP);
match(eAXRegP);
match(eBXRegP);
@@ -3823,13 +3863,6 @@ operand eDIRegP(eRegP reg) %{
interface(REG_INTER);
%}
-operand eBPRegP() %{
- constraint(ALLOC_IN_RC(ebp_reg));
- match(RegP);
- format %{ "EBP" %}
- interface(REG_INTER);
-%}
-
operand eRegL() %{
constraint(ALLOC_IN_RC(long_reg));
match(RegL);
@@ -12708,7 +12741,6 @@ instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst,
// compute_padding() functions will have to be adjusted.
instruct CallStaticJavaDirect(method meth) %{
match(CallStaticJava);
- predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
effect(USE meth);
ins_cost(300);
@@ -12722,29 +12754,6 @@ instruct CallStaticJavaDirect(method meth) %{
ins_alignment(4);
%}
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-// compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
- match(CallStaticJava);
- predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
- effect(USE meth);
- // EBP is saved by all callees (for interpreter stack correction).
- // We use it here for a similar purpose, in {preserve,restore}_SP.
-
- ins_cost(300);
- format %{ "CALL,static/MethodHandle " %}
- opcode(0xE8); /* E8 cd */
- ins_encode( pre_call_resets,
- preserve_SP,
- Java_Static_Call( meth ),
- restore_SP,
- call_epilog,
- post_call_FPU );
- ins_pipe( pipe_slow );
- ins_alignment(4);
-%}
-
// Call Java Dynamic Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
// compute_padding() functions will have to be adjusted.
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index 1233dbf77..52bc63507 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -166,55 +166,67 @@ alloc_class chunk0(R10, R10_H,
// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
-// Class for all pointer registers (including RSP)
-reg_class any_reg(RAX, RAX_H,
- RDX, RDX_H,
- RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RCX, RCX_H,
- RBX, RBX_H,
- RSP, RSP_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R12, R12_H,
- R13, R13_H,
- R14, R14_H,
- R15, R15_H);
-
-// Class for all pointer registers except RSP
-reg_class ptr_reg(RAX, RAX_H,
- RDX, RDX_H,
- RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RCX, RCX_H,
- RBX, RBX_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R13, R13_H,
- R14, R14_H);
-
-// Class for all pointer registers except RAX and RSP
-reg_class ptr_no_rax_reg(RDX, RDX_H,
- RBP, RBP_H,
+// Empty register class.
+reg_class no_reg();
+
+// Class for all pointer registers (including RSP and RBP)
+reg_class any_reg_with_rbp(RAX, RAX_H,
+ RDX, RDX_H,
+ RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ RSP, RSP_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R12, R12_H,
+ R13, R13_H,
+ R14, R14_H,
+ R15, R15_H);
+
+// Class for all pointer registers (including RSP, but excluding RBP)
+reg_class any_reg_no_rbp(RAX, RAX_H,
+ RDX, RDX_H,
RDI, RDI_H,
RSI, RSI_H,
RCX, RCX_H,
RBX, RBX_H,
+ RSP, RSP_H,
R8, R8_H,
R9, R9_H,
R10, R10_H,
R11, R11_H,
+ R12, R12_H,
R13, R13_H,
- R14, R14_H);
-
-reg_class ptr_no_rbp_reg(RDX, RDX_H,
- RAX, RAX_H,
+ R14, R14_H,
+ R15, R15_H);
+
+// Dynamic register class that selects at runtime between register classes
+// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer).
+// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
+reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RSP)
+reg_class ptr_reg_with_rbp(RAX, RAX_H,
+ RDX, RDX_H,
+ RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all pointer registers (excluding RSP and RBP)
+reg_class ptr_reg_no_rbp(RAX, RAX_H,
+ RDX, RDX_H,
RDI, RDI_H,
RSI, RSI_H,
RCX, RCX_H,
@@ -226,18 +238,66 @@ reg_class ptr_no_rbp_reg(RDX, RDX_H,
R13, R13_H,
R14, R14_H);
-// Class for all pointer registers except RAX, RBX and RSP
-reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
- RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RCX, RCX_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R13, R13_H,
- R14, R14_H);
+// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
+reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX and RSP)
+reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
+ RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RSP, and RBP)
+reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX, RBX, and RSP)
+reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
+ RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
+reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
// Singleton class for RAX pointer register
reg_class ptr_rax_reg(RAX, RAX_H);
@@ -251,59 +311,29 @@ reg_class ptr_rsi_reg(RSI, RSI_H);
// Singleton class for RDI pointer register
reg_class ptr_rdi_reg(RDI, RDI_H);
-// Singleton class for RBP pointer register
-reg_class ptr_rbp_reg(RBP, RBP_H);
-
// Singleton class for stack pointer
reg_class ptr_rsp_reg(RSP, RSP_H);
// Singleton class for TLS pointer
reg_class ptr_r15_reg(R15, R15_H);
-// Class for all long registers (except RSP)
-reg_class long_reg(RAX, RAX_H,
- RDX, RDX_H,
- RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RCX, RCX_H,
- RBX, RBX_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R13, R13_H,
- R14, R14_H);
-
-// Class for all long registers except RAX, RDX (and RSP)
-reg_class long_no_rax_rdx_reg(RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RCX, RCX_H,
- RBX, RBX_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R13, R13_H,
- R14, R14_H);
-
-// Class for all long registers except RCX (and RSP)
-reg_class long_no_rcx_reg(RBP, RBP_H,
- RDI, RDI_H,
- RSI, RSI_H,
- RAX, RAX_H,
- RDX, RDX_H,
- RBX, RBX_H,
- R8, R8_H,
- R9, R9_H,
- R10, R10_H,
- R11, R11_H,
- R13, R13_H,
- R14, R14_H);
-
-// Class for all long registers except RAX (and RSP)
-reg_class long_no_rax_reg(RBP, RBP_H,
+// Class for all long registers (excluding RSP)
+reg_class long_reg_with_rbp(RAX, RAX_H,
+ RDX, RDX_H,
+ RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all long registers (excluding RSP and RBP)
+reg_class long_reg_no_rbp(RAX, RAX_H,
RDX, RDX_H,
RDI, RDI_H,
RSI, RSI_H,
@@ -316,6 +346,67 @@ reg_class long_no_rax_reg(RBP, RBP_H,
R13, R13_H,
R14, R14_H);
+// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
+reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RAX, RDX and RSP)
+reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all long registers (excluding RAX, RDX, RSP, and RBP)
+reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
+ RSI, RSI_H,
+ RCX, RCX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RCX and RSP)
+reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
+ RDI, RDI_H,
+ RSI, RSI_H,
+ RAX, RAX_H,
+ RDX, RDX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Class for all long registers (excluding RCX, RSP, and RBP)
+reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
+ RSI, RSI_H,
+ RAX, RAX_H,
+ RDX, RDX_H,
+ RBX, RBX_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R13, R13_H,
+ R14, R14_H);
+
+// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
+reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
// Singleton class for RAX long register
reg_class long_rax_reg(RAX, RAX_H);
@@ -325,27 +416,27 @@ reg_class long_rcx_reg(RCX, RCX_H);
// Singleton class for RDX long register
reg_class long_rdx_reg(RDX, RDX_H);
-// Class for all int registers (except RSP)
-reg_class int_reg(RAX,
- RDX,
- RBP,
- RDI,
- RSI,
- RCX,
- RBX,
- R8,
- R9,
- R10,
- R11,
- R13,
- R14);
-
-// Class for all int registers except RCX (and RSP)
-reg_class int_no_rcx_reg(RAX,
+// Class for all int registers (excluding RSP)
+reg_class int_reg_with_rbp(RAX,
+ RDX,
+ RBP,
+ RDI,
+ RSI,
+ RCX,
+ RBX,
+ R8,
+ R9,
+ R10,
+ R11,
+ R13,
+ R14);
+
+// Class for all int registers (excluding RSP and RBP)
+reg_class int_reg_no_rbp(RAX,
RDX,
- RBP,
RDI,
RSI,
+ RCX,
RBX,
R8,
R9,
@@ -354,18 +445,66 @@ reg_class int_no_rcx_reg(RAX,
R13,
R14);
-// Class for all int registers except RAX, RDX (and RSP)
-reg_class int_no_rax_rdx_reg(RBP,
- RDI,
- RSI,
- RCX,
- RBX,
- R8,
- R9,
- R10,
- R11,
- R13,
- R14);
+// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
+reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RCX and RSP)
+reg_class int_no_rcx_reg_with_rbp(RAX,
+ RDX,
+ RBP,
+ RDI,
+ RSI,
+ RBX,
+ R8,
+ R9,
+ R10,
+ R11,
+ R13,
+ R14);
+
+// Class for all int registers (excluding RCX, RSP, and RBP)
+reg_class int_no_rcx_reg_no_rbp(RAX,
+ RDX,
+ RDI,
+ RSI,
+ RBX,
+ R8,
+ R9,
+ R10,
+ R11,
+ R13,
+ R14);
+
+// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
+reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RAX, RDX, and RSP)
+reg_class int_no_rax_rdx_reg_with_rbp(RBP,
+ RDI,
+ RSI,
+ RCX,
+ RBX,
+ R8,
+ R9,
+ R10,
+ R11,
+ R13,
+ R14);
+
+// Class for all int registers (excluding RAX, RDX, RSP, and RBP)
+reg_class int_no_rax_rdx_reg_no_rbp(RDI,
+ RSI,
+ RCX,
+ RBX,
+ R8,
+ R9,
+ R10,
+ R11,
+ R13,
+ R14);
+
+// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
// Singleton class for RAX int register
reg_class int_rax_reg(RAX);
@@ -396,9 +535,6 @@ source %{
#define __ _masm.
-static int preserve_SP_size() {
- return 3; // rex.w, op, rm(reg/reg)
-}
static int clear_avx_size() {
return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
}
@@ -409,9 +545,7 @@ static int clear_avx_size() {
int MachCallStaticJavaNode::ret_addr_offset()
{
int offset = 5; // 5 bytes from start of call to where return address points
- offset += clear_avx_size();
- if (_method_handle_invoke)
- offset += preserve_SP_size();
+ offset += clear_avx_size();
return offset;
}
@@ -450,16 +584,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const
-{
- current_offset += preserve_SP_size(); // skip mov rbp, rsp
- current_offset += clear_avx_size(); // skip vzeroupper
- current_offset += 1; // skip call opcode byte
- return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
{
current_offset += clear_avx_size(); // skip vzeroupper
@@ -724,6 +848,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("# stack bang (%d bytes)", bangsize);
st->print("\n\t");
st->print("pushq rbp\t# Save rbp");
+ if (PreserveFramePointer) {
+ st->print("\n\t");
+ st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
+ }
if (framesize) {
st->print("\n\t");
st->print("subq rsp, #%d\t# Create frame",framesize);
@@ -732,7 +860,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("subq rsp, #%d\t# Create frame",framesize);
st->print("\n\t");
framesize -= wordSize;
- st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
+ st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
+ if (PreserveFramePointer) {
+ st->print("\n\t");
+ st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
+ if (framesize > 0) {
+ st->print("\n\t");
+ st->print("addq rbp, #%d", framesize);
+ }
+ }
}
if (VerifyStackAtCalls) {
@@ -1598,8 +1734,9 @@ RegMask Matcher::modL_proj_mask() {
return LONG_RDX_REG_mask();
}
+// Register for saving SP into on method handle invokes. Not used on x86_64.
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
- return PTR_RBP_REG_mask();
+ return NO_REG_mask();
}
%}
@@ -3202,7 +3339,7 @@ operand no_rax_rdx_RegI()
// Pointer Register
operand any_RegP()
%{
- constraint(ALLOC_IN_RC(any_reg));
+ constraint(ALLOC_IN_RC(any_reg));
match(RegP);
match(rax_RegP);
match(rbx_RegP);
@@ -3224,8 +3361,8 @@ operand rRegP()
match(rbx_RegP);
match(rdi_RegP);
match(rsi_RegP);
- match(rbp_RegP);
- match(r15_RegP); // See Q&A below about r15_RegP.
+ match(rbp_RegP); // See Q&A below about
+ match(r15_RegP); // r15_RegP and rbp_RegP.
format %{ %}
interface(REG_INTER);
@@ -3241,11 +3378,14 @@ operand rRegN() %{
// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
// Answer: Operand match rules govern the DFA as it processes instruction inputs.
-// It's fine for an instruction input which expects rRegP to match a r15_RegP.
+// It's fine for an instruction input that expects rRegP to match a r15_RegP.
// The output of an instruction is controlled by the allocator, which respects
// register class masks, not match rules. Unless an instruction mentions
// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
// by the allocator as an input.
+// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
+// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
+// result, RBP is not included in the output of the instruction either.
operand no_rax_RegP()
%{
@@ -3259,9 +3399,11 @@ operand no_rax_RegP()
interface(REG_INTER);
%}
+// This operand is not allowed to use RBP even if
+// RBP is not used to hold the frame pointer.
operand no_rbp_RegP()
%{
- constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
+ constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
match(RegP);
match(rbx_RegP);
match(rsi_RegP);
@@ -3338,16 +3480,6 @@ operand rdi_RegP()
interface(REG_INTER);
%}
-operand rbp_RegP()
-%{
- constraint(ALLOC_IN_RC(ptr_rbp_reg));
- match(RegP);
- match(rRegP);
-
- format %{ %}
- interface(REG_INTER);
-%}
-
operand r15_RegP()
%{
constraint(ALLOC_IN_RC(ptr_r15_reg));
@@ -11414,7 +11546,6 @@ instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
// compute_padding() functions will have to be adjusted.
instruct CallStaticJavaDirect(method meth) %{
match(CallStaticJava);
- predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
effect(USE meth);
ins_cost(300);
@@ -11425,27 +11556,6 @@ instruct CallStaticJavaDirect(method meth) %{
ins_alignment(4);
%}
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-// compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
- match(CallStaticJava);
- predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
- effect(USE meth);
- // RBP is saved by all callees (for interpreter stack correction).
- // We use it here for a similar purpose, in {preserve,restore}_SP.
-
- ins_cost(300);
- format %{ "call,static/MethodHandle " %}
- opcode(0xE8); /* E8 cd */
- ins_encode(clear_avx, preserve_SP,
- Java_Static_Call(meth),
- restore_SP,
- call_epilog);
- ins_pipe(pipe_slow);
- ins_alignment(4);
-%}
-
// Call Java Dynamic Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
// compute_padding() functions will have to be adjusted.