aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/x86
diff options
context:
space:
mode:
authorAlexey Ushakov <alexey.v.ushakov@gmail.com>2020-01-20 12:26:37 +0300
committerGitHub <noreply@github.com>2020-01-20 12:26:37 +0300
commit63388d7cef8a6b2d2fd1c64ee4e612e514e21409 (patch)
tree40924405498e32d269acb4295c9625efb5f471cc /src/cpu/x86
parent9db779113bfae4bb0853a5d13c6114133ada6683 (diff)
parent19256756bcd043698ee64bd800f3da39ddbef3a7 (diff)
downloadjdk8u_hotspot-63388d7cef8a6b2d2fd1c64ee4e612e514e21409.tar.gz
jdk8u242 update
Diffstat (limited to 'src/cpu/x86')
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp9
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp2
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_32.cpp170
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_64.cpp176
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86.cpp4
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86.hpp7
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp49
7 files changed, 402 insertions, 15 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 7cbc47d60..1759ecdfd 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -2575,6 +2575,15 @@ void Assembler::psrldq(XMMRegister dst, int shift) {
emit_int8(shift);
}
+void Assembler::pslldq(XMMRegister dst, int shift) {
+ // Shift left 128 bit value in xmm register by number of bytes.
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
+ emit_int8(0x73);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(shift);
+}
+
void Assembler::ptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 341d9e39b..5ea01311e 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -1527,6 +1527,8 @@ private:
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
+ // Shift Left by bytes Logical DoubleQuadword Immediate
+ void pslldq(XMMRegister dst, int shift);
// Logical Compare 128bit
void ptest(XMMRegister dst, XMMRegister src);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 50a06d7a5..2e5599807 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -2719,6 +2719,169 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // byte swap x86 long
+ address generate_ghash_long_swap_mask() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+ address start = __ pc();
+ __ emit_data(0x0b0a0908, relocInfo::none, 0);
+ __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
+ __ emit_data(0x03020100, relocInfo::none, 0);
+ __ emit_data(0x07060504, relocInfo::none, 0);
+
+ return start;
+ }
+
+ // byte swap x86 byte array
+ address generate_ghash_byte_swap_mask() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+ address start = __ pc();
+ __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+ __ emit_data(0x08090a0b, relocInfo::none, 0);
+ __ emit_data(0x04050607, relocInfo::none, 0);
+ __ emit_data(0x00010203, relocInfo::none, 0);
+ return start;
+ }
+
+ /* Single and multi-block ghash operations */
+ address generate_ghash_processBlocks() {
+ assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
+ __ align(CodeEntryAlignment);
+ Label L_ghash_loop, L_exit;
+ StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+ address start = __ pc();
+
+ const Register state = rdi;
+ const Register subkeyH = rsi;
+ const Register data = rdx;
+ const Register blocks = rcx;
+
+ const Address state_param(rbp, 8+0);
+ const Address subkeyH_param(rbp, 8+4);
+ const Address data_param(rbp, 8+8);
+ const Address blocks_param(rbp, 8+12);
+
+ const XMMRegister xmm_temp0 = xmm0;
+ const XMMRegister xmm_temp1 = xmm1;
+ const XMMRegister xmm_temp2 = xmm2;
+ const XMMRegister xmm_temp3 = xmm3;
+ const XMMRegister xmm_temp4 = xmm4;
+ const XMMRegister xmm_temp5 = xmm5;
+ const XMMRegister xmm_temp6 = xmm6;
+ const XMMRegister xmm_temp7 = xmm7;
+
+ __ enter();
+ handleSOERegisters(true); // Save registers
+
+ __ movptr(state, state_param);
+ __ movptr(subkeyH, subkeyH_param);
+ __ movptr(data, data_param);
+ __ movptr(blocks, blocks_param);
+
+ __ movdqu(xmm_temp0, Address(state, 0));
+ __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+ __ movdqu(xmm_temp1, Address(subkeyH, 0));
+ __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+ __ BIND(L_ghash_loop);
+ __ movdqu(xmm_temp2, Address(data, 0));
+ __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+ __ pxor(xmm_temp0, xmm_temp2);
+
+ //
+ // Multiply with the hash key
+ //
+ __ movdqu(xmm_temp3, xmm_temp0);
+ __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
+ __ movdqu(xmm_temp4, xmm_temp0);
+ __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
+
+ __ movdqu(xmm_temp5, xmm_temp0);
+ __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
+ __ movdqu(xmm_temp6, xmm_temp0);
+ __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
+
+ __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
+
+ __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
+ __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
+ __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
+ __ pxor(xmm_temp3, xmm_temp5);
+ __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
+ // of the carry-less multiplication of
+ // xmm0 by xmm1.
+
+ // We shift the result of the multiplication by one bit position
+ // to the left to cope for the fact that the bits are reversed.
+ __ movdqu(xmm_temp7, xmm_temp3);
+ __ movdqu(xmm_temp4, xmm_temp6);
+ __ pslld (xmm_temp3, 1);
+ __ pslld(xmm_temp6, 1);
+ __ psrld(xmm_temp7, 31);
+ __ psrld(xmm_temp4, 31);
+ __ movdqu(xmm_temp5, xmm_temp7);
+ __ pslldq(xmm_temp4, 4);
+ __ pslldq(xmm_temp7, 4);
+ __ psrldq(xmm_temp5, 12);
+ __ por(xmm_temp3, xmm_temp7);
+ __ por(xmm_temp6, xmm_temp4);
+ __ por(xmm_temp6, xmm_temp5);
+
+ //
+ // First phase of the reduction
+ //
+ // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
+ // independently.
+ __ movdqu(xmm_temp7, xmm_temp3);
+ __ movdqu(xmm_temp4, xmm_temp3);
+ __ movdqu(xmm_temp5, xmm_temp3);
+ __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
+ __ pslld(xmm_temp4, 30); // packed right shift shifting << 30
+ __ pslld(xmm_temp5, 25); // packed right shift shifting << 25
+ __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
+ __ pxor(xmm_temp7, xmm_temp5);
+ __ movdqu(xmm_temp4, xmm_temp7);
+ __ pslldq(xmm_temp7, 12);
+ __ psrldq(xmm_temp4, 4);
+ __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
+
+ //
+ // Second phase of the reduction
+ //
+ // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
+ // shift operations.
+ __ movdqu(xmm_temp2, xmm_temp3);
+ __ movdqu(xmm_temp7, xmm_temp3);
+ __ movdqu(xmm_temp5, xmm_temp3);
+ __ psrld(xmm_temp2, 1); // packed left shifting >> 1
+ __ psrld(xmm_temp7, 2); // packed left shifting >> 2
+ __ psrld(xmm_temp5, 7); // packed left shifting >> 7
+ __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
+ __ pxor(xmm_temp2, xmm_temp5);
+ __ pxor(xmm_temp2, xmm_temp4);
+ __ pxor(xmm_temp3, xmm_temp2);
+ __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
+
+ __ decrement(blocks);
+ __ jcc(Assembler::zero, L_exit);
+ __ movdqu(xmm_temp0, xmm_temp6);
+ __ addptr(data, 16);
+ __ jmp(L_ghash_loop);
+
+ __ BIND(L_exit);
+ // Byte swap 16-byte result
+ __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+ __ movdqu(Address(state, 0), xmm_temp6); // store the result
+
+ handleSOERegisters(false); // restore registers
+ __ leave();
+ __ ret(0);
+ return start;
+ }
+
/**
* Arguments:
*
@@ -3018,6 +3181,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
}
+ // Generate GHASH intrinsics code
+ if (UseGHASHIntrinsics) {
+ StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+ StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+ }
+
// Safefetch stubs.
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
&StubRoutines::_safefetch32_fault_pc,
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 1d38af799..c5811b28b 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -3639,6 +3639,175 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+
+ // byte swap x86 long
+ address generate_ghash_long_swap_mask() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+ address start = __ pc();
+ __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
+ __ emit_data64(0x0706050403020100, relocInfo::none );
+ return start;
+ }
+
+ // byte swap x86 byte array
+ address generate_ghash_byte_swap_mask() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+ address start = __ pc();
+ __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
+ __ emit_data64(0x0001020304050607, relocInfo::none );
+ return start;
+ }
+
+ /* Single and multi-block ghash operations */
+ address generate_ghash_processBlocks() {
+ __ align(CodeEntryAlignment);
+ Label L_ghash_loop, L_exit;
+ StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+ address start = __ pc();
+
+ const Register state = c_rarg0;
+ const Register subkeyH = c_rarg1;
+ const Register data = c_rarg2;
+ const Register blocks = c_rarg3;
+
+#ifdef _WIN64
+ const int XMM_REG_LAST = 10;
+#endif
+
+ const XMMRegister xmm_temp0 = xmm0;
+ const XMMRegister xmm_temp1 = xmm1;
+ const XMMRegister xmm_temp2 = xmm2;
+ const XMMRegister xmm_temp3 = xmm3;
+ const XMMRegister xmm_temp4 = xmm4;
+ const XMMRegister xmm_temp5 = xmm5;
+ const XMMRegister xmm_temp6 = xmm6;
+ const XMMRegister xmm_temp7 = xmm7;
+ const XMMRegister xmm_temp8 = xmm8;
+ const XMMRegister xmm_temp9 = xmm9;
+ const XMMRegister xmm_temp10 = xmm10;
+
+ __ enter();
+
+#ifdef _WIN64
+ // save the xmm registers which must be preserved 6-10
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+#endif
+
+ __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+ __ movdqu(xmm_temp0, Address(state, 0));
+ __ pshufb(xmm_temp0, xmm_temp10);
+
+
+ __ BIND(L_ghash_loop);
+ __ movdqu(xmm_temp2, Address(data, 0));
+ __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+ __ movdqu(xmm_temp1, Address(subkeyH, 0));
+ __ pshufb(xmm_temp1, xmm_temp10);
+
+ __ pxor(xmm_temp0, xmm_temp2);
+
+ //
+ // Multiply with the hash key
+ //
+ __ movdqu(xmm_temp3, xmm_temp0);
+ __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
+ __ movdqu(xmm_temp4, xmm_temp0);
+ __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
+
+ __ movdqu(xmm_temp5, xmm_temp0);
+ __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
+ __ movdqu(xmm_temp6, xmm_temp0);
+ __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
+
+ __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
+
+ __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
+ __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
+ __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
+ __ pxor(xmm_temp3, xmm_temp5);
+ __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
+ // of the carry-less multiplication of
+ // xmm0 by xmm1.
+
+ // We shift the result of the multiplication by one bit position
+ // to the left to cope for the fact that the bits are reversed.
+ __ movdqu(xmm_temp7, xmm_temp3);
+ __ movdqu(xmm_temp8, xmm_temp6);
+ __ pslld(xmm_temp3, 1);
+ __ pslld(xmm_temp6, 1);
+ __ psrld(xmm_temp7, 31);
+ __ psrld(xmm_temp8, 31);
+ __ movdqu(xmm_temp9, xmm_temp7);
+ __ pslldq(xmm_temp8, 4);
+ __ pslldq(xmm_temp7, 4);
+ __ psrldq(xmm_temp9, 12);
+ __ por(xmm_temp3, xmm_temp7);
+ __ por(xmm_temp6, xmm_temp8);
+ __ por(xmm_temp6, xmm_temp9);
+
+ //
+ // First phase of the reduction
+ //
+ // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+ // independently.
+ __ movdqu(xmm_temp7, xmm_temp3);
+ __ movdqu(xmm_temp8, xmm_temp3);
+ __ movdqu(xmm_temp9, xmm_temp3);
+ __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
+ __ pslld(xmm_temp8, 30); // packed right shift shifting << 30
+ __ pslld(xmm_temp9, 25); // packed right shift shifting << 25
+ __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
+ __ pxor(xmm_temp7, xmm_temp9);
+ __ movdqu(xmm_temp8, xmm_temp7);
+ __ pslldq(xmm_temp7, 12);
+ __ psrldq(xmm_temp8, 4);
+ __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
+
+ //
+ // Second phase of the reduction
+ //
+ // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+ // shift operations.
+ __ movdqu(xmm_temp2, xmm_temp3);
+ __ movdqu(xmm_temp4, xmm_temp3);
+ __ movdqu(xmm_temp5, xmm_temp3);
+ __ psrld(xmm_temp2, 1); // packed left shifting >> 1
+ __ psrld(xmm_temp4, 2); // packed left shifting >> 2
+ __ psrld(xmm_temp5, 7); // packed left shifting >> 7
+ __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
+ __ pxor(xmm_temp2, xmm_temp5);
+ __ pxor(xmm_temp2, xmm_temp8);
+ __ pxor(xmm_temp3, xmm_temp2);
+ __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
+
+ __ decrement(blocks);
+ __ jcc(Assembler::zero, L_exit);
+ __ movdqu(xmm_temp0, xmm_temp6);
+ __ addptr(data, 16);
+ __ jmp(L_ghash_loop);
+
+ __ BIND(L_exit);
+ __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
+ __ movdqu(Address(state, 0), xmm_temp6); // store the result
+
+#ifdef _WIN64
+ // restore xmm regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+#endif
+ __ leave();
+ __ ret(0);
+ return start;
+ }
+
/**
* Arguments:
*
@@ -4077,6 +4246,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
+ // Generate GHASH intrinsics code
+ if (UseGHASHIntrinsics) {
+ StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+ StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+ }
+
// Safefetch stubs.
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
&StubRoutines::_safefetch32_fault_pc,
diff --git a/src/cpu/x86/vm/stubRoutines_x86.cpp b/src/cpu/x86/vm/stubRoutines_x86.cpp
index 200f2aff8..9b0d8fc75 100644
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp
+++ b/src/cpu/x86/vm/stubRoutines_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,8 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
+address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
{
diff --git a/src/cpu/x86/vm/stubRoutines_x86.hpp b/src/cpu/x86/vm/stubRoutines_x86.hpp
index d8e52ab3b..bb160486c 100644
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp
+++ b/src/cpu/x86/vm/stubRoutines_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,10 +36,15 @@
// masks and table for CRC32
static uint64_t _crc_by128_masks[];
static juint _crc_table[];
+ // swap mask for ghash
+ static address _ghash_long_swap_mask_addr;
+ static address _ghash_byte_swap_mask_addr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
+ static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
+ static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index fd0a68d10..3a4246c68 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -553,12 +553,36 @@ void VM_Version::get_processor_features() {
// Use AES instructions if available.
if (supports_aes()) {
if (FLAG_IS_DEFAULT(UseAES)) {
- UseAES = true;
+ FLAG_SET_DEFAULT(UseAES, true);
}
- } else if (UseAES) {
- if (!FLAG_IS_DEFAULT(UseAES))
+ if (!UseAES) {
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ } else {
+ if (UseSSE > 2) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+ }
+ } else {
+ // The AES intrinsic stubs require AES instruction support (of course)
+ // but also require sse3 mode or higher for instructions it use.
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ }
+ } else if (UseAES || UseAESIntrinsics) {
+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
warning("AES instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseAES, false);
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
}
// Use CLMUL instructions if available.
@@ -582,16 +606,15 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
- // The AES intrinsic stubs require AES instruction support (of course)
- // but also require sse3 mode for instructions it use.
- if (UseAES && (UseSSE > 2)) {
- if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
- UseAESIntrinsics = true;
+ // GHASH/GCM intrinsics
+ if (UseCLMUL && (UseSSE > 2)) {
+ if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+ UseGHASHIntrinsics = true;
}
- } else if (UseAESIntrinsics) {
- if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
- warning("AES intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ } else if (UseGHASHIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+ warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
+ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
}
if (UseSHA) {