Merge pull request #6 from bell-sw/update_8u242jb8u242-b1644.3 jb8u242-b1644 jb8u242-b1641 jb8u242-b1640 jb8u232-b1644.2 jetbrains-master-mirror

jdk8u242 update
author: Alexey Ushakov <alexey.v.ushakov@gmail.com> 2020-01-20 12:26:37 +0300
committer: GitHub <noreply@github.com> 2020-01-20 12:26:37 +0300
commit: 63388d7cef8a6b2d2fd1c64ee4e612e514e21409 (patch)
tree: 40924405498e32d269acb4295c9625efb5f471cc /src/cpu/x86
parent: 9db779113bfae4bb0853a5d13c6114133ada6683 (diff)
parent: 19256756bcd043698ee64bd800f3da39ddbef3a7 (diff)
download: jdk8u_hotspot-63388d7cef8a6b2d2fd1c64ee4e612e514e21409.tar.gz
7 files changed, 402 insertions, 15 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 7cbc47d60..1759ecdfd 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -2575,6 +2575,15 @@ void Assembler::psrldq(XMMRegister dst, int shift) {
   emit_int8(shift);
 }
 
+void Assembler::pslldq(XMMRegister dst, int shift) {
+  // Shift left 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
+}
+
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 341d9e39b..5ea01311e 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -1527,6 +1527,8 @@ private:
 
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
+  // Shift Left by bytes Logical DoubleQuadword Immediate
+  void pslldq(XMMRegister dst, int shift);
 
   // Logical Compare 128bit
   void ptest(XMMRegister dst, XMMRegister src);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 50a06d7a5..2e5599807 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -2719,6 +2719,169 @@ class StubGenerator: public StubCodeGenerator {
     return start;
   }
 
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0b0a0908, relocInfo::none, 0);
+    __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
+    __ emit_data(0x03020100, relocInfo::none, 0);
+    __ emit_data(0x07060504, relocInfo::none, 0);
+
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x00010203, relocInfo::none, 0);
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = rdi;
+    const Register subkeyH      = rsi;
+    const Register data         = rdx;
+    const Register blocks       = rcx;
+
+    const Address  state_param(rbp, 8+0);
+    const Address  subkeyH_param(rbp, 8+4);
+    const Address  data_param(rbp, 8+8);
+    const Address  blocks_param(rbp, 8+12);
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+
+    __ enter();
+    handleSOERegisters(true);  // Save registers
+
+    __ movptr(state, state_param);
+    __ movptr(subkeyH, subkeyH_param);
+    __ movptr(data, data_param);
+    __ movptr(blocks, blocks_param);
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp6);
+    __ pslld (xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp4, 31);
+    __ movdqu(xmm_temp5, xmm_temp7);
+    __ pslldq(xmm_temp4, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp5, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp4);
+    __ por(xmm_temp6, xmm_temp5);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp5);
+    __ movdqu(xmm_temp4, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp4, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp4);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+       // Byte swap 16-byte result
+    __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+    handleSOERegisters(false);  // restore registers
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -3018,6 +3181,13 @@ class StubGenerator: public StubCodeGenerator {
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
                                                    &StubRoutines::_safefetch32_fault_pc,
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 1d38af799..c5811b28b 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -3639,6 +3639,175 @@ class StubGenerator: public StubCodeGenerator {
     return start;
   }
 
+
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
+    __ emit_data64(0x0706050403020100, relocInfo::none );
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
+    __ emit_data64(0x0001020304050607, relocInfo::none );
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = c_rarg0;
+    const Register subkeyH      = c_rarg1;
+    const Register data         = c_rarg2;
+    const Register blocks       = c_rarg3;
+
+#ifdef _WIN64
+    const int XMM_REG_LAST  = 10;
+#endif
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+    const XMMRegister xmm_temp8 = xmm8;
+    const XMMRegister xmm_temp9 = xmm9;
+    const XMMRegister xmm_temp10 = xmm10;
+
+    __ enter();
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-10
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+
+    __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, xmm_temp10);
+
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, xmm_temp10);
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp6);
+    __ pslld(xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp8, 31);
+    __ movdqu(xmm_temp9, xmm_temp7);
+    __ pslldq(xmm_temp8, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp9, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp8);
+    __ por(xmm_temp6, xmm_temp9);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp3);
+    __ movdqu(xmm_temp9, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp9);
+    __ movdqu(xmm_temp8, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp8, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp8);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+    __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -4077,6 +4246,13 @@ class StubGenerator: public StubCodeGenerator {
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
diff --git a/src/cpu/x86/vm/stubRoutines_x86.cpp b/src/cpu/x86/vm/stubRoutines_x86.cpp
index 200f2aff8..9b0d8fc75 100644
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp
+++ b/src/cpu/x86/vm/stubRoutines_x86.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,8 @@
 
 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
+address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
 
 uint64_t StubRoutines::x86::_crc_by128_masks[] =
 {
diff --git a/src/cpu/x86/vm/stubRoutines_x86.hpp b/src/cpu/x86/vm/stubRoutines_x86.hpp
index d8e52ab3b..bb160486c 100644
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp
+++ b/src/cpu/x86/vm/stubRoutines_x86.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,10 +36,15 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  // swap mask for ghash
+  static address _ghash_long_swap_mask_addr;
+  static address _ghash_byte_swap_mask_addr;
 
  public:
   static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
+  static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
+  static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index fd0a68d10..3a4246c68 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -553,12 +553,36 @@ void VM_Version::get_processor_features() {
   // Use AES instructions if available.
   if (supports_aes()) {
     if (FLAG_IS_DEFAULT(UseAES)) {
-      UseAES = true;
+      FLAG_SET_DEFAULT(UseAES, true);
     }
-  } else if (UseAES) {
-    if (!FLAG_IS_DEFAULT(UseAES))
+    if (!UseAES) {
+      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+      }
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    } else {
+      if (UseSSE > 2) {
+        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+        }
+      } else {
+        // The AES intrinsic stubs require AES instruction support (of course)
+        // but also require sse3 mode or higher for instructions it use.
+        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
+        }
+        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+      }
+    }
+  } else if (UseAES || UseAESIntrinsics) {
+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
       warning("AES instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAES, false);
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
   }
 
   // Use CLMUL instructions if available.
@@ -582,16 +606,15 @@ void VM_Version::get_processor_features() {
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
   }
 
-  // The AES intrinsic stubs require AES instruction support (of course)
-  // but also require sse3 mode for instructions it use.
-  if (UseAES && (UseSSE > 2)) {
-    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-      UseAESIntrinsics = true;
+  // GHASH/GCM intrinsics
+  if (UseCLMUL && (UseSSE > 2)) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      UseGHASHIntrinsics = true;
     }
-  } else if (UseAESIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+  } else if (UseGHASHIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
   }
 
   if (UseSHA) {
author	Alexey Ushakov <alexey.v.ushakov@gmail.com>	2020-01-20 12:26:37 +0300
committer	GitHub <noreply@github.com>	2020-01-20 12:26:37 +0300
commit	63388d7cef8a6b2d2fd1c64ee4e612e514e21409 (patch)
tree	40924405498e32d269acb4295c9625efb5f471cc /src/cpu/x86
parent	9db779113bfae4bb0853a5d13c6114133ada6683 (diff)
parent	19256756bcd043698ee64bd800f3da39ddbef3a7 (diff)
download	jdk8u_hotspot-63388d7cef8a6b2d2fd1c64ee4e612e514e21409.tar.gz