diff options
author | Peter Johnson <peter@tortall.net> | 2011-07-03 14:36:37 -0700 |
---|---|---|
committer | Peter Johnson <peter@tortall.net> | 2011-07-03 14:36:37 -0700 |
commit | d779fcb04e7b47b6054483a498ec3ad77428bb24 (patch) | |
tree | eabe708b9347a6354018ee4f0aacc194795bb32c /modules | |
parent | fc7724a3df1dd3b65317f66547573939a1f269e6 (diff) | |
download | yasm-d779fcb04e7b47b6054483a498ec3ad77428bb24.tar.gz |
Add most Intel AVX2 instructions.
Reference: http://www.intel.com/software/avx rev11 spec
This is all AVX2 instructions except for VGATHER*/VPGATHER*, which
require additional ModRM handling.
Portions contributed by: Mark Charney <mark.charney@intel.com>
Part of [#227].
Diffstat (limited to 'modules')
-rwxr-xr-x | modules/arch/x86/gen_x86_insn.py | 679 | ||||
-rw-r--r-- | modules/arch/x86/tests/Makefile.inc | 2 | ||||
-rw-r--r-- | modules/arch/x86/tests/avx2.asm | 612 | ||||
-rw-r--r-- | modules/arch/x86/tests/avx2.hex | 2105 |
4 files changed, 3280 insertions, 118 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py index ded19ea5..5dec9e81 100755 --- a/modules/arch/x86/gen_x86_insn.py +++ b/modules/arch/x86/gen_x86_insn.py @@ -3997,44 +3997,44 @@ add_insn("punpckldq", "mmxsse2", modifiers=[0x62]) add_insn("pxor", "mmxsse2", modifiers=[0xEF]) # AVX versions don't support the MMX registers -add_insn("vpackssdw", "xmm_xmm128", modifiers=[0x66, 0x6B, VEXL0], avx=True) -add_insn("vpacksswb", "xmm_xmm128", modifiers=[0x66, 0x63, VEXL0], avx=True) -add_insn("vpackuswb", "xmm_xmm128", modifiers=[0x66, 0x67, VEXL0], avx=True) -add_insn("vpaddb", "xmm_xmm128", modifiers=[0x66, 0xFC, VEXL0], avx=True) -add_insn("vpaddw", "xmm_xmm128", modifiers=[0x66, 0xFD, VEXL0], avx=True) -add_insn("vpaddd", "xmm_xmm128", modifiers=[0x66, 0xFE, VEXL0], avx=True) -add_insn("vpaddq", "xmm_xmm128", modifiers=[0x66, 0xD4, VEXL0], avx=True) -add_insn("vpaddsb", "xmm_xmm128", modifiers=[0x66, 0xEC, VEXL0], avx=True) -add_insn("vpaddsw", "xmm_xmm128", modifiers=[0x66, 0xED, VEXL0], avx=True) -add_insn("vpaddusb", "xmm_xmm128", modifiers=[0x66, 0xDC, VEXL0], avx=True) -add_insn("vpaddusw", "xmm_xmm128", modifiers=[0x66, 0xDD, VEXL0], avx=True) -add_insn("vpand", "xmm_xmm128", modifiers=[0x66, 0xDB, VEXL0], avx=True) -add_insn("vpandn", "xmm_xmm128", modifiers=[0x66, 0xDF, VEXL0], avx=True) -add_insn("vpcmpeqb", "xmm_xmm128", modifiers=[0x66, 0x74, VEXL0], avx=True) -add_insn("vpcmpeqw", "xmm_xmm128", modifiers=[0x66, 0x75, VEXL0], avx=True) -add_insn("vpcmpeqd", "xmm_xmm128", modifiers=[0x66, 0x76, VEXL0], avx=True) -add_insn("vpcmpgtb", "xmm_xmm128", modifiers=[0x66, 0x64, VEXL0], avx=True) -add_insn("vpcmpgtw", "xmm_xmm128", modifiers=[0x66, 0x65, VEXL0], avx=True) -add_insn("vpcmpgtd", "xmm_xmm128", modifiers=[0x66, 0x66, VEXL0], avx=True) -add_insn("vpmaddwd", "xmm_xmm128", modifiers=[0x66, 0xF5, VEXL0], avx=True) -add_insn("vpmulhw", "xmm_xmm128", modifiers=[0x66, 0xE5, VEXL0], avx=True) -add_insn("vpmullw", "xmm_xmm128", modifiers=[0x66, 0xD5, VEXL0], avx=True) -add_insn("vpor", "xmm_xmm128", modifiers=[0x66, 0xEB, VEXL0], avx=True) -add_insn("vpsubb", "xmm_xmm128", modifiers=[0x66, 0xF8, VEXL0], avx=True) -add_insn("vpsubw", "xmm_xmm128", modifiers=[0x66, 0xF9, VEXL0], avx=True) -add_insn("vpsubd", "xmm_xmm128", modifiers=[0x66, 0xFA, VEXL0], avx=True) -add_insn("vpsubq", "xmm_xmm128", modifiers=[0x66, 0xFB, VEXL0], avx=True) -add_insn("vpsubsb", "xmm_xmm128", modifiers=[0x66, 0xE8, VEXL0], avx=True) -add_insn("vpsubsw", "xmm_xmm128", modifiers=[0x66, 0xE9, VEXL0], avx=True) -add_insn("vpsubusb", "xmm_xmm128", modifiers=[0x66, 0xD8, VEXL0], avx=True) -add_insn("vpsubusw", "xmm_xmm128", modifiers=[0x66, 0xD9, VEXL0], avx=True) -add_insn("vpunpckhbw", "xmm_xmm128", modifiers=[0x66, 0x68, VEXL0], avx=True) -add_insn("vpunpckhwd", "xmm_xmm128", modifiers=[0x66, 0x69, VEXL0], avx=True) -add_insn("vpunpckhdq", "xmm_xmm128", modifiers=[0x66, 0x6A, VEXL0], avx=True) -add_insn("vpunpcklbw", "xmm_xmm128", modifiers=[0x66, 0x60, VEXL0], avx=True) -add_insn("vpunpcklwd", "xmm_xmm128", modifiers=[0x66, 0x61, VEXL0], avx=True) -add_insn("vpunpckldq", "xmm_xmm128", modifiers=[0x66, 0x62, VEXL0], avx=True) -add_insn("vpxor", "xmm_xmm128", modifiers=[0x66, 0xEF, VEXL0], avx=True) +add_insn("vpackssdw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6B, VEXL0], avx=True) +add_insn("vpacksswb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x63, VEXL0], avx=True) +add_insn("vpackuswb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x67, VEXL0], avx=True) +add_insn("vpaddb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFC, VEXL0], avx=True) +add_insn("vpaddw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFD, VEXL0], avx=True) +add_insn("vpaddd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFE, VEXL0], avx=True) +add_insn("vpaddq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD4, VEXL0], avx=True) +add_insn("vpaddsb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEC, VEXL0], avx=True) +add_insn("vpaddsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xED, VEXL0], avx=True) +add_insn("vpaddusb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDC, VEXL0], avx=True) +add_insn("vpaddusw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDD, VEXL0], avx=True) +add_insn("vpand", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDB, VEXL0], avx=True) +add_insn("vpandn", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDF, VEXL0], avx=True) +add_insn("vpcmpeqb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x74, VEXL0], avx=True) +add_insn("vpcmpeqw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x75, VEXL0], avx=True) +add_insn("vpcmpeqd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x76, VEXL0], avx=True) +add_insn("vpcmpgtb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x64, VEXL0], avx=True) +add_insn("vpcmpgtw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x65, VEXL0], avx=True) +add_insn("vpcmpgtd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x66, VEXL0], avx=True) +add_insn("vpmaddwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF5, VEXL0], avx=True) +add_insn("vpmulhw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE5, VEXL0], avx=True) +add_insn("vpmullw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD5, VEXL0], avx=True) +add_insn("vpor", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEB, VEXL0], avx=True) +add_insn("vpsubb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF8, VEXL0], avx=True) +add_insn("vpsubw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF9, VEXL0], avx=True) +add_insn("vpsubd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFA, VEXL0], avx=True) +add_insn("vpsubq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFB, VEXL0], avx=True) +add_insn("vpsubsb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE8, VEXL0], avx=True) +add_insn("vpsubsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE9, VEXL0], avx=True) +add_insn("vpsubusb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD8, VEXL0], avx=True) +add_insn("vpsubusw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD9, VEXL0], avx=True) +add_insn("vpunpckhbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x68, VEXL0], avx=True) +add_insn("vpunpckhwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x69, VEXL0], avx=True) +add_insn("vpunpckhdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6A, VEXL0], avx=True) +add_insn("vpunpcklbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x60, VEXL0], avx=True) +add_insn("vpunpcklwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x61, VEXL0], avx=True) +add_insn("vpunpckldq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x62, VEXL0], avx=True) +add_insn("vpxor", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEF, VEXL0], avx=True) add_group("pshift", cpu=["MMX"], @@ -4075,42 +4075,43 @@ add_insn("psrld", "pshift", modifiers=[0xD2, 0x72, 2]) add_insn("psrlq", "pshift", modifiers=[0xD3, 0x73, 2]) # Ran out of modifiers, so AVX has to be separate -add_group("vpshift", - cpu=["AVX"], - modifiers=["Op1Add"], - vex=128, - prefix=0x66, - opcode=[0x0F, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) -add_group("vpshift", - cpu=["AVX"], - modifiers=["Gap", "Op1Add", "SpAdd"], - vex=128, - prefix=0x66, - opcode=[0x0F, 0x00], - spare=0, - operands=[Operand(type="SIMDReg", size=128, dest="EAVEX"), - Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) -add_group("vpshift", - cpu=["AVX"], - modifiers=["Op1Add"], - vex=128, - prefix=0x66, - opcode=[0x0F, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDReg", size=128, dest="VEX"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) -add_group("vpshift", - cpu=["AVX"], - modifiers=["Gap", "Op1Add", "SpAdd"], - vex=128, - prefix=0x66, - opcode=[0x0F, 0x00], - spare=0, - operands=[Operand(type="SIMDReg", size=128, dest="VEX"), - Operand(type="SIMDReg", size=128, dest="EA"), - Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +for cpu, sz in zip(["AVX", "AVX2"], [128, 256]): + add_group("vpshift", + cpu=[cpu], + modifiers=["Op1Add"], + vex=sz, + prefix=0x66, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="SpareVEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) + add_group("vpshift", + cpu=[cpu], + modifiers=["Gap", "Op1Add", "SpAdd"], + vex=sz, + prefix=0x66, + opcode=[0x0F, 0x00], + spare=0, + operands=[Operand(type="SIMDReg", size=sz, dest="EAVEX"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + add_group("vpshift", + cpu=[cpu], + modifiers=["Op1Add"], + vex=sz, + prefix=0x66, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) + add_group("vpshift", + cpu=[cpu], + modifiers=["Gap", "Op1Add", "SpAdd"], + vex=sz, + prefix=0x66, + opcode=[0x0F, 0x00], + spare=0, + operands=[Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDReg", size=sz, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_insn("vpsllw", "vpshift", modifiers=[0xF1, 0x71, 6]) add_insn("vpslld", "vpshift", modifiers=[0xF2, 0x72, 6]) @@ -4134,14 +4135,14 @@ add_insn("pmulhuw", "mmxsse2", modifiers=[0xE4], cpu=["P3", "MMX"]) add_insn("psadbw", "mmxsse2", modifiers=[0xF6], cpu=["P3", "MMX"]) # AVX versions don't support MMX register -add_insn("vpavgb", "xmm_xmm128", modifiers=[0x66, 0xE0, VEXL0], avx=True) -add_insn("vpavgw", "xmm_xmm128", modifiers=[0x66, 0xE3, VEXL0], avx=True) -add_insn("vpmaxsw", "xmm_xmm128", modifiers=[0x66, 0xEE, VEXL0], avx=True) -add_insn("vpmaxub", "xmm_xmm128", modifiers=[0x66, 0xDE, VEXL0], avx=True) -add_insn("vpminsw", "xmm_xmm128", modifiers=[0x66, 0xEA, VEXL0], avx=True) -add_insn("vpminub", "xmm_xmm128", modifiers=[0x66, 0xDA, VEXL0], avx=True) -add_insn("vpmulhuw", "xmm_xmm128", modifiers=[0x66, 0xE4, VEXL0], avx=True) -add_insn("vpsadbw", "xmm_xmm128", modifiers=[0x66, 0xF6, VEXL0], avx=True) +add_insn("vpavgb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE0, VEXL0], avx=True) +add_insn("vpavgw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE3, VEXL0], avx=True) +add_insn("vpmaxsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEE, VEXL0], avx=True) +add_insn("vpmaxub", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDE, VEXL0], avx=True) +add_insn("vpminsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEA, VEXL0], avx=True) +add_insn("vpminub", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDA, VEXL0], avx=True) +add_insn("vpmulhuw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE4, VEXL0], avx=True) +add_insn("vpsadbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF6, VEXL0], avx=True) add_insn("prefetchnta", "twobytemem", modifiers=[0, 0x0F, 0x18], cpu=["P3"]) add_insn("prefetcht0", "twobytemem", modifiers=[1, 0x0F, 0x18], cpu=["P3"]) @@ -4172,6 +4173,49 @@ add_group("xmm_xmm128_256", vex=256, prefix=0x00, opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) +add_group("xmm_xmm128_256", + cpu=["AVX"], + modifiers=["PreAdd", "Op1Add"], + vex=256, + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) + +# Same as above, except 256-bit version only available in AVX2 +add_group("xmm_xmm128_256avx2", + cpu=["SSE"], + modifiers=["PreAdd", "Op1Add", "SetVEX"], + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("xmm_xmm128_256avx2", + cpu=["AVX"], + modifiers=["PreAdd", "Op1Add"], + vex=128, + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="VEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("xmm_xmm128_256avx2", + cpu=["AVX2"], + modifiers=["PreAdd", "Op1Add"], + vex=256, + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) +add_group("xmm_xmm128_256avx2", + cpu=["AVX2"], + modifiers=["PreAdd", "Op1Add"], + vex=256, + prefix=0x00, + opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="SIMDReg", size=256, dest="VEX"), Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) @@ -4526,6 +4570,23 @@ add_group("xmm_xmm128_imm", add_insn("cmpps", "xmm_xmm128_imm", modifiers=[0, 0xC2]) add_insn("shufps", "xmm_xmm128_imm", modifiers=[0, 0xC6]) +# YMM register AVX2 version of above +add_group("xmm_xmm128_imm_256avx2", + cpu=["SSE"], + modifiers=["PreAdd", "Op1Add", "SetVEX"], + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("xmm_xmm128_imm_256avx2", + cpu=["AVX2"], + modifiers=["PreAdd", "Op1Add"], + vex=256, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + # YMM register and 4-operand version of above add_group("xmm_xmm128_imm_256", cpu=["SSE"], @@ -4991,6 +5052,14 @@ add_group("pmovmskb", operands=[Operand(type="Reg", size=32, dest="Spare"), Operand(type="SIMDReg", size=128, dest="EA")]) add_group("pmovmskb", + suffix="l", + cpu=["AVX2"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0xD7], + operands=[Operand(type="Reg", size=32, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="EA")]) +add_group("pmovmskb", suffix="q", cpu=["MMX", "P3"], notavx=True, @@ -5009,6 +5078,16 @@ add_group("pmovmskb", opcode=[0x0F, 0xD7], operands=[Operand(type="Reg", size=64, dest="Spare"), Operand(type="SIMDReg", size=128, dest="EA")]) +add_group("pmovmskb", + suffix="q", + cpu=["SSE2"], + vex=256, + opersize=64, + def_opersize_64=64, + prefix=0x66, + opcode=[0x0F, 0xD7], + operands=[Operand(type="Reg", size=64, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="EA")]) add_insn("pmovmskb", "pmovmskb") add_insn("vpmovmskb", "pmovmskb", modifiers=[VEXL0], avx=True) @@ -5419,12 +5498,12 @@ add_insn("punpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C], cpu=["SSE2"]) add_insn("vcvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C, VEXL0], avx=True) # vcvttpd2dq takes xmm, ymm combination # vcvttps2dq is two-operand -add_insn("vpmuludq", "xmm_xmm128", modifiers=[0x66, 0xF4, VEXL0], avx=True) -add_insn("vpshufd", "xmm_xmm128_imm", modifiers=[0x66, 0x70, VEXL0], avx=True) -add_insn("vpshufhw", "xmm_xmm128_imm", modifiers=[0xF3, 0x70, VEXL0], avx=True) -add_insn("vpshuflw", "xmm_xmm128_imm", modifiers=[0xF2, 0x70, VEXL0], avx=True) -add_insn("vpunpckhqdq", "xmm_xmm128", modifiers=[0x66, 0x6D, VEXL0], avx=True) -add_insn("vpunpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C, VEXL0], avx=True) +add_insn("vpmuludq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF4, VEXL0], avx=True) +add_insn("vpshufd", "xmm_xmm128_imm_256avx2", modifiers=[0x66, 0x70, VEXL0], avx=True) +add_insn("vpshufhw", "xmm_xmm128_imm_256avx2", modifiers=[0xF3, 0x70, VEXL0], avx=True) +add_insn("vpshuflw", "xmm_xmm128_imm_256avx2", modifiers=[0xF2, 0x70, VEXL0], avx=True) +add_insn("vpunpckhqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6D, VEXL0], avx=True) +add_insn("vpunpcklqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6C, VEXL0], avx=True) add_insn("cvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A], cpu=["SSE2"]) add_insn("vcvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A, VEXL0], avx=True) @@ -5480,6 +5559,25 @@ add_group("pslrldq", operands=[Operand(type="SIMDReg", size=128, dest="VEX"), Operand(type="SIMDReg", size=128, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("pslrldq", + cpu=["AVX2"], + modifiers=["SpAdd"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x73], + spare=0, + operands=[Operand(type="SIMDReg", size=256, dest="EAVEX"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("pslrldq", + cpu=["AVX2"], + modifiers=["SpAdd"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x73], + spare=0, + operands=[Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDReg", size=256, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_insn("pslldq", "pslrldq", modifiers=[7]) add_insn("psrldq", "pslrldq", modifiers=[3]) @@ -5556,6 +5654,23 @@ add_group("ssse3", operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDReg", size=128, dest="VEX"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("ssse3", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) +add_group("ssse3", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) add_insn("pshufb", "ssse3", modifiers=[0x00]) add_insn("phaddw", "ssse3", modifiers=[0x01]) @@ -5604,7 +5719,7 @@ add_group("ssse3imm", Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_insn("palignr", "ssse3imm", modifiers=[0x0F]) -add_insn("vpalignr", "sse4imm", modifiers=[0x0F, VEXL0], avx=True) +add_insn("vpalignr", "sse4imm_256avx2", modifiers=[0x0F, VEXL0], avx=True) ##################################################################### # SSE4.1 / SSE4.2 instructions @@ -5684,6 +5799,54 @@ add_group("sse4imm_256", vex=256, prefix=0x66, opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("sse4imm_256", + cpu=["AVX"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +# Same as above except AVX2 required for 256-bit. +add_group("sse4imm_256avx2", + cpu=["SSE41"], + modifiers=["Op2Add", "SetVEX"], + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("sse4imm_256avx2", + cpu=["AVX"], + modifiers=["Op2Add"], + vex=128, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="VEX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("sse4imm_256avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +add_group("sse4imm_256avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="SIMDReg", size=256, dest="VEX"), Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), @@ -5758,13 +5921,14 @@ add_insn("roundps", "sse4imm", modifiers=[0x08]) add_insn("roundsd", "sse4m64imm", modifiers=[0x0B]) add_insn("roundss", "sse4m32imm", modifiers=[0x0A]) -# vdppd, vmpsadbw, and vpblendw do not allow YMM registers +# vdppd does not allow YMM registers +# vmpsadbw and vpblendw do not allow YMM registers unless AVX2 add_insn("vblendpd", "sse4imm_256", modifiers=[0x0D, VEXL0], avx=True) add_insn("vblendps", "sse4imm_256", modifiers=[0x0C, VEXL0], avx=True) add_insn("vdppd", "sse4imm", modifiers=[0x41, VEXL0], avx=True) add_insn("vdpps", "sse4imm_256", modifiers=[0x40, VEXL0], avx=True) -add_insn("vmpsadbw", "sse4imm", modifiers=[0x42, VEXL0], avx=True) -add_insn("vpblendw", "sse4imm", modifiers=[0x0E, VEXL0], avx=True) +add_insn("vmpsadbw", "sse4imm_256avx2", modifiers=[0x42, VEXL0], avx=True) +add_insn("vpblendw", "sse4imm_256avx2", modifiers=[0x0E, VEXL0], avx=True) # vroundpd and vroundps don't add another register operand add_insn("vroundsd", "sse4m64imm", modifiers=[0x0B, VEXL0], avx=True) add_insn("vroundss", "sse4m32imm", modifiers=[0x0A, VEXL0], avx=True) @@ -5814,9 +5978,9 @@ add_group("avx_sse4xmm0", add_insn("vblendvpd", "avx_sse4xmm0", modifiers=[0x4B]) add_insn("vblendvps", "avx_sse4xmm0", modifiers=[0x4A]) -# vpblendvb doesn't have a 256-bit form -add_group("avx_sse4xmm0_128", - cpu=["AVX"], +# vpblendvb didn't have a 256-bit form until AVX2 +add_group("avx2_sse4xmm0", + cpu=["AVX2"], modifiers=["Op2Add"], vex=128, prefix=0x66, @@ -5825,8 +5989,18 @@ add_group("avx_sse4xmm0_128", Operand(type="SIMDReg", size=128, dest="VEX"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), Operand(type="SIMDReg", size=128, dest="VEXImmSrc")]) +add_group("avx2_sse4xmm0", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=256, dest="VEXImmSrc")]) -add_insn("vpblendvb", "avx_sse4xmm0_128", modifiers=[0x4C]) +add_insn("vpblendvb", "avx2_sse4xmm0", modifiers=[0x4C]) for sfx, sz in zip("bwl", [8, 16, 32]): add_group("crc32", @@ -5915,6 +6089,13 @@ add_group("movntdqa", opcode=[0x0F, 0x38, 0x2A], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="Mem", size=128, relaxed=True, dest="EA")]) +add_group("movntdqa", + cpu=["AVX2"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x2A], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="Mem", size=256, relaxed=True, dest="EA")]) add_insn("movntdqa", "movntdqa") add_insn("vmovntdqa", "movntdqa", modifiers=[VEXL0], avx=True) @@ -6089,6 +6270,22 @@ for sz in [16, 32, 64]: opcode=[0x0F, 0x38, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDReg", size=128, dest="EA")]) + add_group("sse4m%d" % sz, + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="Mem", size=sz*2, relaxed=True, dest="EA")]) + add_group("sse4m%d" % sz, + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) add_insn("pmovsxbw", "sse4m64", modifiers=[0x20]) add_insn("pmovsxwd", "sse4m64", modifiers=[0x23]) @@ -6365,12 +6562,22 @@ add_group("avx_ssse3_2op", opcode=[0x0F, 0x38, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) - -add_insn("vpabsb", "avx_ssse3_2op", modifiers=[0x1C], avx=True) -add_insn("vpabsw", "avx_ssse3_2op", modifiers=[0x1D], avx=True) -add_insn("vpabsd", "avx_ssse3_2op", modifiers=[0x1E], avx=True) add_insn("vphminposuw", "avx_ssse3_2op", modifiers=[0x41], avx=True) +# VPABS* are extended to 256-bit in AVX2 +for cpu, sz in zip(["AVX", "AVX2"], [128, 256]): + add_group("avx2_ssse3_2op", + cpu=[cpu], + modifiers=["Op2Add"], + vex=sz, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")]) +add_insn("vpabsb", "avx2_ssse3_2op", modifiers=[0x1C], avx=True) +add_insn("vpabsw", "avx2_ssse3_2op", modifiers=[0x1D], avx=True) +add_insn("vpabsd", "avx2_ssse3_2op", modifiers=[0x1E], avx=True) + # Some conversion functions take xmm, ymm combination # Need separate x and y versions for gas mode add_group("avx_cvt_xmm128_x", @@ -6437,6 +6644,20 @@ add_group("vbroadcastss", opcode=[0x0F, 0x38, 0x18], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="Mem", size=32, relaxed=True, dest="EA")]) +add_group("vbroadcastss", + cpu=["AVX2"], + vex=128, + prefix=0x66, + opcode=[0x0F, 0x38, 0x18], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) +add_group("vbroadcastss", + cpu=["AVX2"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x18], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) add_insn("vbroadcastss", "vbroadcastss") @@ -6447,41 +6668,51 @@ add_group("vbroadcastsd", opcode=[0x0F, 0x38, 0x19], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="Mem", size=64, relaxed=True, dest="EA")]) +add_group("vbroadcastsd", + cpu=["AVX2"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x38, 0x19], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) add_insn("vbroadcastsd", "vbroadcastsd") -add_group("vbroadcastf128", - cpu=["AVX"], +add_group("vbroadcastif128", + modifiers=["Op2Add"], vex=256, prefix=0x66, - opcode=[0x0F, 0x38, 0x1A], + opcode=[0x0F, 0x38, 0x00], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="Mem", size=128, relaxed=True, dest="EA")]) -add_insn("vbroadcastf128", "vbroadcastf128") +add_insn("vbroadcastf128", "vbroadcastif128", modifiers=[0x1A], cpu=["AVX"]) +add_insn("vbroadcasti128", "vbroadcastif128", modifiers=[0x5A], cpu=["AVX2"]) -add_group("vextractf128", - cpu=["AVX"], +add_group("vextractif128", + modifiers=["Op2Add"], vex=256, prefix=0x66, - opcode=[0x0F, 0x3A, 0x19], + opcode=[0x0F, 0x3A, 0x00], operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) -add_insn("vextractf128", "vextractf128") +add_insn("vextractf128", "vextractif128", modifiers=[0x19], cpu=["AVX"]) +add_insn("vextracti128", "vextractif128", modifiers=[0x39], cpu=["AVX2"]) -add_group("vinsertf128", - cpu=["AVX"], +add_group("vinsertif128", + modifiers=["Op2Add"], vex=256, prefix=0x66, - opcode=[0x0F, 0x3A, 0x18], + opcode=[0x0F, 0x3A, 0x00], operands=[Operand(type="SIMDReg", size=256, dest="Spare"), Operand(type="SIMDReg", size=256, dest="VEX"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) -add_insn("vinsertf128", "vinsertf128") +add_insn("vinsertf128", "vinsertif128", modifiers=[0x18], cpu=["AVX"]) +add_insn("vinserti128", "vinsertif128", modifiers=[0x38], cpu=["AVX2"]) add_group("vzero", cpu=["AVX"], @@ -6493,7 +6724,6 @@ add_insn("vzeroall", "vzero", modifiers=[VEXL1]) add_insn("vzeroupper", "vzero", modifiers=[VEXL0]) add_group("vmaskmov", - cpu=["AVX"], modifiers=["Op2Add"], vex=128, prefix=0x66, @@ -6502,7 +6732,6 @@ add_group("vmaskmov", Operand(type="SIMDReg", size=128, dest="VEX"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) add_group("vmaskmov", - cpu=["AVX"], modifiers=["Op2Add"], vex=256, prefix=0x66, @@ -6511,7 +6740,6 @@ add_group("vmaskmov", Operand(type="SIMDReg", size=256, dest="VEX"), Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) add_group("vmaskmov", - cpu=["AVX"], modifiers=["Op2Add"], vex=128, prefix=0x66, @@ -6520,7 +6748,6 @@ add_group("vmaskmov", Operand(type="SIMDReg", size=128, dest="VEX"), Operand(type="SIMDReg", size=128, dest="Spare")]) add_group("vmaskmov", - cpu=["AVX"], modifiers=["Op2Add"], vex=256, prefix=0x66, @@ -6529,8 +6756,8 @@ add_group("vmaskmov", Operand(type="SIMDReg", size=256, dest="VEX"), Operand(type="SIMDReg", size=256, dest="Spare")]) -add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C]) -add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D]) +add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C], cpu=["AVX"]) +add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D], cpu=["AVX"]) add_group("vpermil", cpu=["AVX"], @@ -6585,6 +6812,222 @@ add_group("vperm2f128", add_insn("vperm2f128", "vperm2f128") ##################################################################### +# Intel AVX2 instructions +##################################################################### + +# Most AVX2 instructions are mixed in with above SSEx/AVX groups. +# Some make more sense to have separate groups. + +# vex.vvvv=1111b +add_group("vperm_var_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")]) + +add_insn("vpermd", "vperm_var_avx2", modifiers=[0x36]) +add_insn("vpermps", "vperm_var_avx2", modifiers=[0x16]) + +# vex.vvvv=1111b +add_group("vperm_imm_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_insn("vpermq", "vperm_imm_avx2", modifiers=[0x00]) +add_insn("vpermpd", "vperm_imm_avx2", modifiers=[0x01]) + +add_group("vperm2i128_avx2", + cpu=["AVX2"], + vex=256, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x46], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="SIMDReg", size=256, dest="VEX"), + Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_insn("vperm2i128", "vperm2i128_avx2") + +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastb_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x78], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")]) +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastb_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x78], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="RM", size=8, relaxed=True, dest="EA")]) + +add_insn("vpbroadcastb", "vpbroadcastb_avx2") + +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastw_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x79], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")]) +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastw_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x79], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="RM", size=16, relaxed=True, dest="EA")]) + +add_insn("vpbroadcastw", "vpbroadcastw_avx2") + +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastd_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x58], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")]) +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastd_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x58], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="RM", size=32, relaxed=True, dest="EA")]) + +add_insn("vpbroadcastd", "vpbroadcastd_avx2") + +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastq_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x59], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")]) +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vpbroadcastq_avx2", + cpu=["AVX2"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x59], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="RM", size=64, relaxed=True, dest="EA")]) + +add_insn("vpbroadcastq", "vpbroadcastq_avx2") + +for sz in [128, 256]: + add_group("vpshiftv_vexw0_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")]) + +for sz in [128, 256]: + add_group("vpshiftv_vexw1_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=sz, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")]) + +add_insn("vpsrlvd", "vpshiftv_vexw0_avx2", modifiers=[0x45]) +add_insn("vpsrlvq", "vpshiftv_vexw1_avx2", modifiers=[0x45]) +add_insn("vpsravd", "vpshiftv_vexw0_avx2", modifiers=[0x46]) + +add_insn("vpsllvd", "vpshiftv_vexw0_avx2", modifiers=[0x47]) +add_insn("vpsllvq", "vpshiftv_vexw1_avx2", modifiers=[0x47]) + +add_insn("vpmaskmovd", "vmaskmov", modifiers=[0x8C], cpu=["AVX2"]) + +# vex.vvvv=1111b +for sz in [128, 256]: + add_group("vmaskmov_vexw1_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=sz, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")]) + +for sz in [128, 256]: + add_group("vmaskmov_vexw1_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=sz, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x02], + operands=[Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDReg", size=sz, dest="Spare")]) + +add_insn("vpmaskmovq", "vmaskmov_vexw1_avx2", modifiers=[0x8C]) + +for sz in [128, 256]: + add_group("vex_66_0F3A_imm8_avx2", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=sz, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=sz, dest="Spare"), + Operand(type="SIMDReg", size=sz, dest="VEX"), + Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02]) + +##################################################################### # Intel FMA instructions ##################################################################### diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index fab543b3..7aebd992 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -25,6 +25,8 @@ EXTRA_DIST += modules/arch/x86/tests/avx.asm EXTRA_DIST += modules/arch/x86/tests/avx.hex EXTRA_DIST += modules/arch/x86/tests/avx16.asm EXTRA_DIST += modules/arch/x86/tests/avx16.hex +EXTRA_DIST += modules/arch/x86/tests/avx2.asm +EXTRA_DIST += modules/arch/x86/tests/avx2.hex EXTRA_DIST += modules/arch/x86/tests/avxcc.asm EXTRA_DIST += modules/arch/x86/tests/avxcc.hex EXTRA_DIST += modules/arch/x86/tests/bittest.asm diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm new file mode 100644 index 00000000..acf17ade --- /dev/null +++ b/modules/arch/x86/tests/avx2.asm @@ -0,0 +1,612 @@ +; Exhaustive test of AVX2 instructions +; +; Copyright (C) 2011 Peter Johnson +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; 1. Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; 2. Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +; POSSIBILITY OF SUCH DAMAGE. +; + +[bits 64] + +vmpsadbw ymm1, ymm3, 3 ; c4 e3 75 42 cb 03 +vmpsadbw ymm1, yword [rax], 3 ; c4 e3 75 42 08 03 +vmpsadbw ymm1, ymm2, ymm3, 3 ; c4 e3 6d 42 cb 03 +vmpsadbw ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 42 08 03 + +vpabsb ymm1, ymm2 ; c4 e2 7d 1c ca +vpabsb ymm1, yword [rax] ; c4 e2 7d 1c 08 + +vpabsw ymm1, ymm2 ; c4 e2 7d 1d ca +vpabsw ymm1, yword [rax] ; c4 e2 7d 1d 08 + +vpabsd ymm1, ymm2 ; c4 e2 7d 1e ca +vpabsd ymm1, yword [rax] ; c4 e2 7d 1e 08 + +vpacksswb ymm1, ymm3 ; c5 f5 63 cb +vpacksswb ymm1, yword [rax] ; c5 f5 63 08 +vpacksswb ymm1, ymm2, ymm3 ; c5 ed 63 cb +vpacksswb ymm1, ymm2, yword [rax] ; c5 ed 63 08 + +vpackssdw ymm1, ymm3 ; c5 f5 6b cb +vpackssdw ymm1, yword [rax] ; c5 f5 6b 08 +vpackssdw ymm1, ymm2, ymm3 ; c5 ed 6b cb +vpackssdw ymm1, ymm2, yword [rax] ; c5 ed 6b 08 + +vpackusdw ymm1, ymm3 ; c4 e2 75 2b cb +vpackusdw ymm1, yword [rax] ; c4 e2 75 2b 08 +vpackusdw ymm1, ymm2, ymm3 ; c4 e2 6d 2b cb +vpackusdw ymm1, ymm2, yword [rax] ; c4 e2 6d 2b 08 + +vpackuswb ymm1, ymm3 ; c5 f5 67 cb +vpackuswb ymm1, yword [rax] ; c5 f5 67 08 +vpackuswb ymm1, ymm2, ymm3 ; c5 ed 67 cb +vpackuswb ymm1, ymm2, yword [rax] ; c5 ed 67 08 + +vpaddb ymm1, ymm3 ; c5 f5 fc cb +vpaddb ymm1, yword [rax] ; c5 f5 fc 08 +vpaddb ymm1, ymm2, ymm3 ; c5 ed fc cb +vpaddb ymm1, ymm2, yword [rax] ; c5 ed fc 08 + +vpaddw ymm1, ymm3 ; c5 f5 fd cb +vpaddw ymm1, yword [rax] ; c5 f5 fd 08 +vpaddw ymm1, ymm2, ymm3 ; c5 ed fd cb +vpaddw ymm1, ymm2, yword [rax] ; c5 ed fd 08 + +vpaddd ymm1, ymm3 ; c5 f5 fe cb +vpaddd ymm1, yword [rax] ; c5 f5 fe 08 +vpaddd ymm1, ymm2, ymm3 ; c5 ed fe cb +vpaddd ymm1, ymm2, yword [rax] ; c5 ed fe 08 + +vpaddq ymm1, ymm3 ; c5 f5 d4 cb +vpaddq ymm1, yword [rax] ; c5 f5 d4 08 +vpaddq ymm1, ymm2, ymm3 ; c5 ed d4 cb +vpaddq ymm1, ymm2, yword [rax] ; c5 ed d4 08 + +vpaddsb ymm1, ymm3 ; c5 f5 ec cb +vpaddsb ymm1, yword [rax] ; c5 f5 ec 08 +vpaddsb ymm1, ymm2, ymm3 ; c5 ed ec cb +vpaddsb ymm1, ymm2, yword [rax] ; c5 ed ec 08 + +vpaddsw ymm1, ymm3 ; c5 f5 ed cb +vpaddsw ymm1, yword [rax] ; c5 f5 ed 08 +vpaddsw ymm1, ymm2, ymm3 ; c5 ed ed cb +vpaddsw ymm1, ymm2, yword [rax] ; c5 ed ed 08 + +vpaddusb ymm1, ymm3 ; c5 f5 dc cb +vpaddusb ymm1, yword [rax] ; c5 f5 dc 08 +vpaddusb ymm1, ymm2, ymm3 ; c5 ed dc cb +vpaddusb ymm1, ymm2, yword [rax] ; c5 ed dc 08 + +vpaddusw ymm1, ymm3 ; c5 f5 dd cb +vpaddusw ymm1, yword [rax] ; c5 f5 dd 08 +vpaddusw ymm1, ymm2, ymm3 ; c5 ed dd cb +vpaddusw ymm1, ymm2, yword [rax] ; c5 ed dd 08 + +vpalignr ymm1, ymm2, ymm3, 3 ; c4 e3 6d 0f cb 03 +vpalignr ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 0f 08 03 + +vpand ymm1, ymm3 ; c5 f5 db cb +vpand ymm1, yword [rax] ; c5 f5 db 08 +vpand ymm1, ymm2, ymm3 ; c5 ed db cb +vpand ymm1, ymm2, yword [rax] ; c5 ed db 08 + +vpandn ymm1, ymm3 ; c5 f5 df cb +vpandn ymm1, yword [rax] ; c5 f5 df 08 +vpandn ymm1, ymm2, ymm3 ; c5 ed df cb +vpandn ymm1, ymm2, yword [rax] ; c5 ed df 08 + +vpavgb ymm1, ymm3 ; c5 f5 e0 cb +vpavgb ymm1, yword [rax] ; c5 f5 e0 08 +vpavgb ymm1, ymm2, ymm3 ; c5 ed e0 cb +vpavgb ymm1, ymm2, yword [rax] ; c5 ed e0 08 + +vpavgw ymm1, ymm3 ; c5 f5 e3 cb +vpavgw ymm1, yword [rax] ; c5 f5 e3 08 +vpavgw ymm1, ymm2, ymm3 ; c5 ed e3 cb +vpavgw ymm1, ymm2, yword [rax] ; c5 ed e3 08 + +vpblendvb ymm1, ymm2, ymm3, ymm4 ; c4 e3 6d 4c cb 40 +vpblendvb ymm1, ymm2, yword [rax], ymm4 ; c4 e3 6d 4c 08 40 + +vpblendw ymm1, ymm3, 3 ; c4 e3 75 0e cb 03 +vpblendw ymm1, yword [rax], 3 ; c4 e3 75 0e 08 03 +vpblendw ymm1, ymm2, ymm3, 3 ; c4 e3 6d 0e cb 03 +vpblendw ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 0e 08 03 + +vpcmpeqb ymm1, ymm3 ; c5 f5 74 cb +vpcmpeqb ymm1, yword [rax] ; c5 f5 74 08 +vpcmpeqb ymm1, ymm2, ymm3 ; c5 ed 74 cb +vpcmpeqb ymm1, ymm2, yword [rax] ; c5 ed 74 08 + +vpcmpeqw ymm1, ymm3 ; c5 f5 75 cb +vpcmpeqw ymm1, yword [rax] ; c5 f5 75 08 +vpcmpeqw ymm1, ymm2, ymm3 ; c5 ed 75 cb +vpcmpeqw ymm1, ymm2, yword [rax] ; c5 ed 75 08 + +vpcmpeqd ymm1, ymm3 ; c5 f5 76 cb +vpcmpeqd ymm1, yword [rax] ; c5 f5 76 08 +vpcmpeqd ymm1, ymm2, ymm3 ; c5 ed 76 cb +vpcmpeqd ymm1, ymm2, yword [rax] ; c5 ed 76 08 + +vpcmpeqq ymm1, ymm3 ; c4 e2 75 29 cb +vpcmpeqq ymm1, yword [rax] ; c4 e2 75 29 08 +vpcmpeqq ymm1, ymm2, ymm3 ; c4 e2 6d 29 cb +vpcmpeqq ymm1, ymm2, yword [rax] ; c4 e2 6d 29 08 + +vpcmpgtb ymm1, ymm3 ; c5 f5 64 cb +vpcmpgtb ymm1, yword [rax] ; c5 f5 64 08 +vpcmpgtb ymm1, ymm2, ymm3 ; c5 ed 64 cb +vpcmpgtb ymm1, ymm2, yword [rax] ; c5 ed 64 08 + +vpcmpgtw ymm1, ymm3 ; c5 f5 65 cb +vpcmpgtw ymm1, yword [rax] ; c5 f5 65 08 +vpcmpgtw ymm1, ymm2, ymm3 ; c5 ed 65 cb +vpcmpgtw ymm1, ymm2, yword [rax] ; c5 ed 65 08 + +vpcmpgtd ymm1, ymm3 ; c5 f5 66 cb +vpcmpgtd ymm1, yword [rax] ; c5 f5 66 08 +vpcmpgtd ymm1, ymm2, ymm3 ; c5 ed 66 cb +vpcmpgtd ymm1, ymm2, yword [rax] ; c5 ed 66 08 + +vpcmpgtq ymm1, ymm3 ; c4 e2 75 37 cb +vpcmpgtq ymm1, yword [rax] ; c4 e2 75 37 08 +vpcmpgtq ymm1, ymm2, ymm3 ; c4 e2 6d 37 cb +vpcmpgtq ymm1, ymm2, yword [rax] ; c4 e2 6d 37 08 + +vphaddw ymm1, ymm3 ; c4 e2 75 01 cb +vphaddw ymm1, yword [rax] ; c4 e2 75 01 08 +vphaddw ymm1, ymm2, ymm3 ; c4 e2 6d 01 cb +vphaddw ymm1, ymm2, yword [rax] ; c4 e2 6d 01 08 + +vphaddd ymm1, ymm3 ; c4 e2 75 02 cb +vphaddd ymm1, yword [rax] ; c4 e2 75 02 08 +vphaddd ymm1, ymm2, ymm3 ; c4 e2 6d 02 cb +vphaddd ymm1, ymm2, yword [rax] ; c4 e2 6d 02 08 + +vphaddsw ymm1, ymm3 ; c4 e2 75 03 cb +vphaddsw ymm1, yword [rax] ; c4 e2 75 03 08 +vphaddsw ymm1, ymm2, ymm3 ; c4 e2 6d 03 cb +vphaddsw ymm1, ymm2, yword [rax] ; c4 e2 6d 03 08 + +vphsubw ymm1, ymm3 ; c4 e2 75 05 cb +vphsubw ymm1, yword [rax] ; c4 e2 75 05 08 +vphsubw ymm1, ymm2, ymm3 ; c4 e2 6d 05 cb +vphsubw ymm1, ymm2, yword [rax] ; c4 e2 6d 05 08 + +vphsubd ymm1, ymm3 ; c4 e2 75 06 cb +vphsubd ymm1, yword [rax] ; c4 e2 75 06 08 +vphsubd ymm1, ymm2, ymm3 ; c4 e2 6d 06 cb +vphsubd ymm1, ymm2, yword [rax] ; c4 e2 6d 06 08 + +vphsubsw ymm1, ymm3 ; c4 e2 75 07 cb +vphsubsw ymm1, yword [rax] ; c4 e2 75 07 08 +vphsubsw ymm1, ymm2, ymm3 ; c4 e2 6d 07 cb +vphsubsw ymm1, ymm2, yword [rax] ; c4 e2 6d 07 08 + +vpmaddubsw ymm1, ymm3 ; c4 e2 75 04 cb +vpmaddubsw ymm1, yword [rax] ; c4 e2 75 04 08 +vpmaddubsw ymm1, ymm2, ymm3 ; c4 e2 6d 04 cb +vpmaddubsw ymm1, ymm2, yword [rax] ; c4 e2 6d 04 08 + +vpmaddwd ymm1, ymm3 ; c5 f5 f5 cb +vpmaddwd ymm1, yword [rax] ; c5 f5 f5 08 +vpmaddwd ymm1, ymm2, ymm3 ; c5 ed f5 cb +vpmaddwd ymm1, ymm2, yword [rax] ; c5 ed f5 08 + +vpmaxsb ymm1, ymm3 ; c4 e2 75 3c cb +vpmaxsb ymm1, yword [rax] ; c4 e2 75 3c 08 +vpmaxsb ymm1, ymm2, ymm3 ; c4 e2 6d 3c cb +vpmaxsb ymm1, ymm2, yword [rax] ; c4 e2 6d 3c 08 + +vpmaxsw ymm1, ymm3 ; c5 f5 ee cb +vpmaxsw ymm1, yword [rax] ; c5 f5 ee 08 +vpmaxsw ymm1, ymm2, ymm3 ; c5 ed ee cb +vpmaxsw ymm1, ymm2, yword [rax] ; c5 ed ee 08 + +vpmaxsd ymm1, ymm3 ; c4 e2 75 3d cb +vpmaxsd ymm1, yword [rax] ; c4 e2 75 3d 08 +vpmaxsd ymm1, ymm2, ymm3 ; c4 e2 6d 3d cb +vpmaxsd ymm1, ymm2, yword [rax] ; c4 e2 6d 3d 08 + +vpmaxub ymm1, ymm3 ; c5 f5 de cb +vpmaxub ymm1, yword [rax] ; c5 f5 de 08 +vpmaxub ymm1, ymm2, ymm3 ; c5 ed de cb +vpmaxub ymm1, ymm2, yword [rax] ; c5 ed de 08 + +vpmaxuw ymm1, ymm3 ; c4 e2 75 3e cb +vpmaxuw ymm1, yword [rax] ; c4 e2 75 3e 08 +vpmaxuw ymm1, ymm2, ymm3 ; c4 e2 6d 3e cb +vpmaxuw ymm1, ymm2, yword [rax] ; c4 e2 6d 3e 08 + +vpmaxud ymm1, ymm3 ; c4 e2 75 3f cb +vpmaxud ymm1, yword [rax] ; c4 e2 75 3f 08 +vpmaxud ymm1, ymm2, ymm3 ; c4 e2 6d 3f cb +vpmaxud ymm1, ymm2, yword [rax] ; c4 e2 6d 3f 08 + +vpminsb ymm1, ymm3 ; c4 e2 75 38 cb +vpminsb ymm1, yword [rax] ; c4 e2 75 38 08 +vpminsb ymm1, ymm2, ymm3 ; c4 e2 6d 38 cb +vpminsb ymm1, ymm2, yword [rax] ; c4 e2 6d 38 08 + +vpminsw ymm1, ymm3 ; c5 f5 ea cb +vpminsw ymm1, yword [rax] ; c5 f5 ea 08 +vpminsw ymm1, ymm2, ymm3 ; c5 ed ea cb +vpminsw ymm1, ymm2, yword [rax] ; c5 ed ea 08 + +vpminsd ymm1, ymm3 ; c4 e2 75 39 cb +vpminsd ymm1, yword [rax] ; c4 e2 75 39 08 +vpminsd ymm1, ymm2, ymm3 ; c4 e2 6d 39 cb +vpminsd ymm1, ymm2, yword [rax] ; c4 e2 6d 39 08 + +vpminub ymm1, ymm3 ; c5 f5 da cb +vpminub ymm1, yword [rax] ; c5 f5 da 08 +vpminub ymm1, ymm2, ymm3 ; c5 ed da cb +vpminub ymm1, ymm2, yword [rax] ; c5 ed da 08 + +vpminuw ymm1, ymm3 ; c4 e2 75 3a cb +vpminuw ymm1, yword [rax] ; c4 e2 75 3a 08 +vpminuw ymm1, ymm2, ymm3 ; c4 e2 6d 3a cb +vpminuw ymm1, ymm2, yword [rax] ; c4 e2 6d 3a 08 + +vpminud ymm1, ymm3 ; c4 e2 75 3b cb +vpminud ymm1, yword [rax] ; c4 e2 75 3b 08 +vpminud ymm1, ymm2, ymm3 ; c4 e2 6d 3b cb +vpminud ymm1, ymm2, yword [rax] ; c4 e2 6d 3b 08 + +vpmovmskb eax, ymm1 ; c5 fd d7 c1 +vpmovmskb rax, ymm1 ; c5 fd d7 c1 + +vpmovsxbw ymm1, xmm2 ; c4 e2 7d 20 ca +vpmovsxbw ymm1, [rax] ; c4 e2 7d 20 08 +vpmovsxbw ymm1, oword [rax] ; c4 e2 7d 20 08 + +vpmovsxbd ymm1, xmm2 ; c4 e2 7d 21 ca +vpmovsxbd ymm1, [rax] ; c4 e2 7d 21 08 +vpmovsxbd ymm1, qword [rax] ; c4 e2 7d 21 08 + +vpmovsxbq ymm1, xmm2 ; c4 e2 7d 22 ca +vpmovsxbq ymm1, [rax] ; c4 e2 7d 22 08 +vpmovsxbq ymm1, dword [rax] ; c4 e2 7d 22 08 + +vpmovsxwd ymm1, xmm2 ; c4 e2 7d 23 ca +vpmovsxwd ymm1, [rax] ; c4 e2 7d 23 08 +vpmovsxwd ymm1, oword [rax] ; c4 e2 7d 23 08 + +vpmovsxwq ymm1, xmm2 ; c4 e2 7d 24 ca +vpmovsxwq ymm1, [rax] ; c4 e2 7d 24 08 +vpmovsxwq ymm1, qword [rax] ; c4 e2 7d 24 08 + +vpmovsxdq ymm1, xmm2 ; c4 e2 7d 25 ca +vpmovsxdq ymm1, [rax] ; c4 e2 7d 25 08 +vpmovsxdq ymm1, oword [rax] ; c4 e2 7d 25 08 + +vpmovzxbw ymm1, xmm2 ; c4 e2 7d 30 ca +vpmovzxbw ymm1, [rax] ; c4 e2 7d 30 08 +vpmovzxbw ymm1, oword [rax] ; c4 e2 7d 30 08 + +vpmovzxbd ymm1, xmm2 ; c4 e2 7d 31 ca +vpmovzxbd ymm1, [rax] ; c4 e2 7d 31 08 +vpmovzxbd ymm1, qword [rax] ; c4 e2 7d 31 08 + +vpmovzxbq ymm1, xmm2 ; c4 e2 7d 32 ca +vpmovzxbq ymm1, [rax] ; c4 e2 7d 32 08 +vpmovzxbq ymm1, dword [rax] ; c4 e2 7d 32 08 + +vpmovzxwd ymm1, xmm2 ; c4 e2 7d 33 ca +vpmovzxwd ymm1, [rax] ; c4 e2 7d 33 08 +vpmovzxwd ymm1, oword [rax] ; c4 e2 7d 33 08 + +vpmovzxwq ymm1, xmm2 ; c4 e2 7d 34 ca +vpmovzxwq ymm1, [rax] ; c4 e2 7d 34 08 +vpmovzxwq ymm1, qword [rax] ; c4 e2 7d 34 08 + +vpmovzxdq ymm1, xmm2 ; c4 e2 7d 35 ca +vpmovzxdq ymm1, [rax] ; c4 e2 7d 35 08 +vpmovzxdq ymm1, oword [rax] ; c4 e2 7d 35 08 + +vpmuldq ymm1, ymm3 ; c4 e2 75 28 cb +vpmuldq ymm1, yword [rax] ; c4 e2 75 28 08 +vpmuldq ymm1, ymm2, ymm3 ; c4 e2 6d 28 cb +vpmuldq ymm1, ymm2, yword [rax] ; c4 e2 6d 28 08 + +vpmulhrsw ymm1, ymm3 ; c4 e2 75 0b cb +vpmulhrsw ymm1, yword [rax] ; c4 e2 75 0b 08 +vpmulhrsw ymm1, ymm2, ymm3 ; c4 e2 6d 0b cb +vpmulhrsw ymm1, ymm2, yword [rax] ; c4 e2 6d 0b 08 + +vpmulhuw ymm1, ymm3 ; c5 f5 e4 cb +vpmulhuw ymm1, yword [rax] ; c5 f5 e4 08 +vpmulhuw ymm1, ymm2, ymm3 ; c5 ed e4 cb +vpmulhuw ymm1, ymm2, yword [rax] ; c5 ed e4 08 + +vpmulhw ymm1, ymm3 ; c5 f5 e5 cb +vpmulhw ymm1, yword [rax] ; c5 f5 e5 08 +vpmulhw ymm1, ymm2, ymm3 ; c5 ed e5 cb +vpmulhw ymm1, ymm2, yword [rax] ; c5 ed e5 08 + +vpmullw ymm1, ymm3 ; c5 f5 d5 cb +vpmullw ymm1, yword [rax] ; c5 f5 d5 08 +vpmullw ymm1, ymm2, ymm3 ; c5 ed d5 cb +vpmullw ymm1, ymm2, yword [rax] ; c5 ed d5 08 + +vpmulld ymm1, ymm3 ; c4 e2 75 40 cb +vpmulld ymm1, yword [rax] ; c4 e2 75 40 08 +vpmulld ymm1, ymm2, ymm3 ; c4 e2 6d 40 cb +vpmulld ymm1, ymm2, yword [rax] ; c4 e2 6d 40 08 + +vpmuludq ymm1, ymm3 ; c5 f5 f4 cb +vpmuludq ymm1, yword [rax] ; c5 f5 f4 08 +vpmuludq ymm1, ymm2, ymm3 ; c5 ed f4 cb +vpmuludq ymm1, ymm2, yword [rax] ; c5 ed f4 08 + +vpor ymm1, ymm3 ; c5 f5 eb cb +vpor ymm1, yword [rax] ; c5 f5 eb 08 +vpor ymm1, ymm2, ymm3 ; c5 ed eb cb +vpor ymm1, ymm2, yword [rax] ; c5 ed eb 08 + +vpsadbw ymm1, ymm3 ; c5 f5 f6 cb +vpsadbw ymm1, yword [rax] ; c5 f5 f6 08 +vpsadbw ymm1, ymm2, ymm3 ; c5 ed f6 cb +vpsadbw ymm1, ymm2, yword [rax] ; c5 ed f6 08 + +vpshufb ymm1, ymm3 ; c4 e2 75 00 cb +vpshufb ymm1, yword [rax] ; c4 e2 75 00 08 +vpshufb ymm1, ymm2, ymm3 ; c4 e2 6d 00 cb +vpshufb ymm1, ymm2, yword [rax] ; c4 e2 6d 00 08 + +vpshufd ymm1, ymm3, 3 ; c5 fd 70 cb 03 +vpshufd ymm1, yword [rax], 3 ; c5 fd 70 08 03 + +vpshufhw ymm1, ymm3, 3 ; c5 fe 70 cb 03 +vpshufhw ymm1, yword [rax], 3 ; c5 fe 70 08 03 + +vpshuflw ymm1, ymm3, 3 ; c5 ff 70 cb 03 +vpshuflw ymm1, yword [rax], 3 ; c5 ff 70 08 03 + +vpsignb ymm1, ymm3 ; c4 e2 75 08 cb +vpsignb ymm1, yword [rax] ; c4 e2 75 08 08 +vpsignb ymm1, ymm2, ymm3 ; c4 e2 6d 08 cb +vpsignb ymm1, ymm2, yword [rax] ; c4 e2 6d 08 08 + +vpsignw ymm1, ymm3 ; c4 e2 75 09 cb +vpsignw ymm1, yword [rax] ; c4 e2 75 09 08 +vpsignw ymm1, ymm2, ymm3 ; c4 e2 6d 09 cb +vpsignw ymm1, ymm2, yword [rax] ; c4 e2 6d 09 08 + +vpsignd ymm1, ymm3 ; c4 e2 75 0a cb +vpsignd ymm1, yword [rax] ; c4 e2 75 0a 08 +vpsignd ymm1, ymm2, ymm3 ; c4 e2 6d 0a cb +vpsignd ymm1, ymm2, yword [rax] ; c4 e2 6d 0a 08 + +vpslldq ymm1, 3 ; c5 f5 73 f9 03 +vpslldq ymm1, ymm2, 3 ; c5 f5 73 fa 03 + +vpsllw ymm1, xmm3 ; c5 f5 f1 cb +vpsllw ymm1, oword [rax] ; c5 f5 f1 08 +vpsllw ymm1, 3 ; c5 f5 71 f1 03 +vpsllw ymm1, ymm2, xmm3 ; c5 ed f1 cb +vpsllw ymm1, ymm2, oword [rax] ; c5 ed f1 08 +vpsllw ymm1, ymm2, 3 ; c5 f5 71 f2 03 + +vpslld ymm1, xmm3 ; c5 f5 f2 cb +vpslld ymm1, oword [rax] ; c5 f5 f2 08 +vpslld ymm1, 3 ; c5 f5 72 f1 03 +vpslld ymm1, ymm2, xmm3 ; c5 ed f2 cb +vpslld ymm1, ymm2, oword [rax] ; c5 ed f2 08 +vpslld ymm1, ymm2, 3 ; c5 f5 72 f2 03 + +vpsllq ymm1, xmm3 ; c5 f5 f3 cb +vpsllq ymm1, oword [rax] ; c5 f5 f3 08 +vpsllq ymm1, 3 ; c5 f5 73 f1 03 +vpsllq ymm1, ymm2, xmm3 ; c5 ed f3 cb +vpsllq ymm1, ymm2, oword [rax] ; c5 ed f3 08 +vpsllq ymm1, ymm2, 3 ; c5 f5 73 f2 03 + +vpsraw ymm1, xmm3 ; c5 f5 e1 cb +vpsraw ymm1, oword [rax] ; c5 f5 e1 08 +vpsraw ymm1, 3 ; c5 f5 71 e1 03 +vpsraw ymm1, ymm2, xmm3 ; c5 ed e1 cb +vpsraw ymm1, ymm2, oword [rax] ; c5 ed e1 08 +vpsraw ymm1, ymm2, 3 ; c5 f5 71 e2 03 + +vpsrad ymm1, xmm3 ; c5 f5 e2 cb +vpsrad ymm1, oword [rax] ; c5 f5 e2 08 +vpsrad ymm1, 3 ; c5 f5 72 e1 03 +vpsrad ymm1, ymm2, xmm3 ; c5 ed e2 cb +vpsrad ymm1, ymm2, oword [rax] ; c5 ed e2 08 +vpsrad ymm1, ymm2, 3 ; c5 f5 72 e2 03 + +vpsrldq ymm1, 3 ; c5 f5 73 d9 03 +vpsrldq ymm1, ymm2, 3 ; c5 f5 73 da 03 + +vpsrlw ymm1, xmm3 ; c5 f5 d1 cb +vpsrlw ymm1, oword [rax] ; c5 f5 d1 08 +vpsrlw ymm1, 3 ; c5 f5 71 d1 03 +vpsrlw ymm1, ymm2, xmm3 ; c5 ed d1 cb +vpsrlw ymm1, ymm2, oword [rax] ; c5 ed d1 08 +vpsrlw ymm1, ymm2, 3 ; c5 f5 71 d2 03 + +vpsrld ymm1, xmm3 ; c5 f5 d2 cb +vpsrld ymm1, oword [rax] ; c5 f5 d2 08 +vpsrld ymm1, 3 ; c5 f5 72 d1 03 +vpsrld ymm1, ymm2, xmm3 ; c5 ed d2 cb +vpsrld ymm1, ymm2, oword [rax] ; c5 ed d2 08 +vpsrld ymm1, ymm2, 3 ; c5 f5 72 d2 03 + +vpsrld ymm1, xmm3 ; c5 f5 d2 cb +vpsrld ymm1, oword [rax] ; c5 f5 d2 08 +vpsrld ymm1, 3 ; c5 f5 72 d1 03 +vpsrld ymm1, ymm2, xmm3 ; c5 ed d2 cb +vpsrld ymm1, ymm2, oword [rax] ; c5 ed d2 08 +vpsrld ymm1, ymm2, 3 ; c5 f5 72 d2 03 + +vpsubsb ymm1, ymm3 ; c5 f5 e8 cb +vpsubsb ymm1, yword [rax] ; c5 f5 e8 08 +vpsubsb ymm1, ymm2, ymm3 ; c5 ed e8 cb +vpsubsb ymm1, ymm2, yword [rax] ; c5 ed e8 08 + +vpsubsw ymm1, ymm3 ; c5 f5 e9 cb +vpsubsw ymm1, yword [rax] ; c5 f5 e9 08 +vpsubsw ymm1, ymm2, ymm3 ; c5 ed e9 cb +vpsubsw ymm1, ymm2, yword [rax] ; c5 ed e9 08 + +vpsubusb ymm1, ymm3 ; c5 f5 d8 cb +vpsubusb ymm1, yword [rax] ; c5 f5 d8 08 +vpsubusb ymm1, ymm2, ymm3 ; c5 ed d8 cb +vpsubusb ymm1, ymm2, yword [rax] ; c5 ed d8 08 + +vpsubusw ymm1, ymm3 ; c5 f5 d9 cb +vpsubusw ymm1, yword [rax] ; c5 f5 d9 08 +vpsubusw ymm1, ymm2, ymm3 ; c5 ed d9 cb +vpsubusw ymm1, ymm2, yword [rax] ; c5 ed d9 08 + +vpunpckhbw ymm1, ymm3 ; c5 f5 68 cb +vpunpckhbw ymm1, yword [rax] ; c5 f5 68 08 +vpunpckhbw ymm1, ymm2, ymm3 ; c5 ed 68 cb +vpunpckhbw ymm1, ymm2, yword [rax] ; c5 ed 68 08 + +vpunpckhwd ymm1, ymm3 ; c5 f5 69 cb +vpunpckhwd ymm1, yword [rax] ; c5 f5 69 08 +vpunpckhwd ymm1, ymm2, ymm3 ; c5 ed 69 cb +vpunpckhwd ymm1, ymm2, yword [rax] ; c5 ed 69 08 + +vpunpckhdq ymm1, ymm3 ; c5 f5 6a cb +vpunpckhdq ymm1, yword [rax] ; c5 f5 6a 08 +vpunpckhdq ymm1, ymm2, ymm3 ; c5 ed 6a cb +vpunpckhdq ymm1, ymm2, yword [rax] ; c5 ed 6a 08 + +vpunpckhqdq ymm1, ymm3 ; c5 f5 6d cb +vpunpckhqdq ymm1, yword [rax] ; c5 f5 6d 08 +vpunpckhqdq ymm1, ymm2, ymm3 ; c5 ed 6d cb +vpunpckhqdq ymm1, ymm2, yword [rax] ; c5 ed 6d 08 + +vpunpcklbw ymm1, ymm3 ; c5 f5 60 cb +vpunpcklbw ymm1, yword [rax] ; c5 f5 60 08 +vpunpcklbw ymm1, ymm2, ymm3 ; c5 ed 60 cb +vpunpcklbw ymm1, ymm2, yword [rax] ; c5 ed 60 08 + +vpunpcklwd ymm1, ymm3 ; c5 f5 61 cb +vpunpcklwd ymm1, yword [rax] ; c5 f5 61 08 +vpunpcklwd ymm1, ymm2, ymm3 ; c5 ed 61 cb +vpunpcklwd ymm1, ymm2, yword [rax] ; c5 ed 61 08 + +vpunpckldq ymm1, ymm3 ; c5 f5 62 cb +vpunpckldq ymm1, yword [rax] ; c5 f5 62 08 +vpunpckldq ymm1, ymm2, ymm3 ; c5 ed 62 cb +vpunpckldq ymm1, ymm2, yword [rax] ; c5 ed 62 08 + +vpunpcklqdq ymm1, ymm3 ; c5 f5 6c cb +vpunpcklqdq ymm1, yword [rax] ; c5 f5 6c 08 +vpunpcklqdq ymm1, ymm2, ymm3 ; c5 ed 6c cb +vpunpcklqdq ymm1, ymm2, yword [rax] ; c5 ed 6c 08 + +vpxor ymm1, ymm3 ; c5 f5 ef cb +vpxor ymm1, yword [rax] ; c5 f5 ef 08 +vpxor ymm1, ymm2, ymm3 ; c5 ed ef cb +vpxor ymm1, ymm2, yword [rax] ; c5 ed ef 08 + +vmovntdqa ymm1, yword [rax] ; c4 e2 7d 2a 08 + +vbroadcastss xmm1, xmm2 ; c4 e2 79 18 ca +vbroadcastss ymm1, xmm2 ; c4 e2 7d 18 ca + +vbroadcastsd ymm1, xmm2 ; c4 e2 7d 19 ca + +vbroadcasti128 ymm1, oword [rax] ; c4 e2 7d 5a 08 + +vpblendd ymm1, ymm2, ymm3, 3 ; c4 e3 6d 02 cb 03 +vpblendd ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 02 08 03 + +vpbroadcastb xmm1, xmm2 ; c4 e2 79 78 ca +vpbroadcastb xmm1, byte [rax] ; c4 e2 79 78 08 +vpbroadcastb ymm1, xmm2 ; c4 e2 7d 78 ca +vpbroadcastb ymm1, byte [rax] ; c4 e2 7d 78 08 + +vpbroadcastw xmm1, xmm2 ; c4 e2 79 79 ca +vpbroadcastw xmm1, word [rax] ; c4 e2 79 79 08 +vpbroadcastw ymm1, xmm2 ; c4 e2 7d 79 ca +vpbroadcastw ymm1, word [rax] ; c4 e2 7d 79 08 + +vpbroadcastd xmm1, xmm2 ; c4 e2 79 58 ca +vpbroadcastd xmm1, dword [rax] ; c4 e2 79 58 08 +vpbroadcastd ymm1, xmm2 ; c4 e2 7d 58 ca +vpbroadcastd ymm1, dword [rax] ; c4 e2 7d 58 08 + +vpbroadcastq xmm1, xmm2 ; c4 e2 79 59 ca +vpbroadcastq xmm1, qword [rax] ; c4 e2 79 59 08 +vpbroadcastq ymm1, xmm2 ; c4 e2 7d 59 ca +vpbroadcastq ymm1, qword [rax] ; c4 e2 7d 59 08 + +vpermd ymm1, ymm2, ymm3 ; c4 e2 6d 36 cb +vpermd ymm1, ymm2, yword [rax] ; c4 e2 6d 36 08 + +vpermpd ymm1, ymm2, 3 ; c4 e3 fd 01 ca 03 +vpermpd ymm1, yword [rax], 3 ; c4 e3 fd 01 08 03 + +vpermps ymm1, ymm2, ymm3 ; c4 e2 6d 16 cb +vpermps ymm1, ymm2, yword [rax] ; c4 e2 6d 16 08 + +vpermq ymm1, ymm2, 3 ; c4 e3 fd 00 ca 03 +vpermq ymm1, yword [rax], 3 ; c4 e3 fd 00 08 03 + +vperm2i128 ymm1, ymm2, ymm3, 3 ; c4 e3 6d 46 cb 03 +vperm2i128 ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 46 08 03 + +vextracti128 xmm1, ymm2, 3 ; c4 e3 7d 39 d1 03 +vextracti128 oword [rax], ymm2, 3 ; c4 e3 7d 39 10 03 + +vinserti128 ymm1, ymm2, xmm3, 3 ; c4 e3 6d 38 cb 03 +vinserti128 ymm1, ymm2, oword [rax], 3 ; c4 e3 6d 38 08 03 + +vpmaskmovd xmm1, xmm2, oword [rax] ; c4 e2 69 8c 08 +vpmaskmovd ymm1, ymm2, yword [rax] ; c4 e2 6d 8c 08 +vpmaskmovd oword [rax], xmm1, xmm2 ; c4 e2 71 8e 10 +vpmaskmovd yword [rax], ymm1, ymm2 ; c4 e2 75 8e 10 + +vpmaskmovq xmm1, xmm2, oword [rax] ; c4 e2 e9 8c 08 +vpmaskmovq ymm1, ymm2, yword [rax] ; c4 e2 ed 8c 08 +vpmaskmovq oword [rax], xmm1, xmm2 ; c4 e2 f1 8e 10 +vpmaskmovq yword [rax], ymm1, ymm2 ; c4 e2 f5 8e 10 + +vpsllvd xmm1, xmm2, xmm3 ; c4 e2 69 47 cb +vpsllvd xmm1, xmm2, oword [rax] ; c4 e2 69 47 08 +vpsllvd ymm1, ymm2, ymm3 ; c4 e2 6d 47 cb +vpsllvd ymm1, ymm2, yword [rax] ; c4 e2 6d 47 08 + +vpsllvq xmm1, xmm2, xmm3 ; c4 e2 e9 47 cb +vpsllvq xmm1, xmm2, oword [rax] ; c4 e2 e9 47 08 +vpsllvq ymm1, ymm2, ymm3 ; c4 e2 ed 47 cb +vpsllvq ymm1, ymm2, yword [rax] ; c4 e2 ed 47 08 + +vpsravd xmm1, xmm2, xmm3 ; c4 e2 69 46 cb +vpsravd xmm1, xmm2, oword [rax] ; c4 e2 69 46 08 +vpsravd ymm1, ymm2, ymm3 ; c4 e2 6d 46 cb +vpsravd ymm1, ymm2, yword [rax] ; c4 e2 6d 46 08 + +vpsrlvd xmm1, xmm2, xmm3 ; c4 e2 69 45 cb +vpsrlvd xmm1, xmm2, oword [rax] ; c4 e2 69 45 08 +vpsrlvd ymm1, ymm2, ymm3 ; c4 e2 6d 45 cb +vpsrlvd ymm1, ymm2, yword [rax] ; c4 e2 6d 45 08 + +vpsrlvq xmm1, xmm2, xmm3 ; c4 e2 e9 45 cb +vpsrlvq xmm1, xmm2, oword [rax] ; c4 e2 e9 45 08 +vpsrlvq ymm1, ymm2, ymm3 ; c4 e2 ed 45 cb +vpsrlvq ymm1, ymm2, yword [rax] ; c4 e2 ed 45 08 diff --git a/modules/arch/x86/tests/avx2.hex b/modules/arch/x86/tests/avx2.hex new file mode 100644 index 00000000..3d9e9cd4 --- /dev/null +++ b/modules/arch/x86/tests/avx2.hex @@ -0,0 +1,2105 @@ +c4 +e3 +75 +42 +cb +03 +c4 +e3 +75 +42 +08 +03 +c4 +e3 +6d +42 +cb +03 +c4 +e3 +6d +42 +08 +03 +c4 +e2 +7d +1c +ca +c4 +e2 +7d +1c +08 +c4 +e2 +7d +1d +ca +c4 +e2 +7d +1d +08 +c4 +e2 +7d +1e +ca +c4 +e2 +7d +1e +08 +c5 +f5 +63 +cb +c5 +f5 +63 +08 +c5 +ed +63 +cb +c5 +ed +63 +08 +c5 +f5 +6b +cb +c5 +f5 +6b +08 +c5 +ed +6b +cb +c5 +ed +6b +08 +c4 +e2 +75 +2b +cb +c4 +e2 +75 +2b +08 +c4 +e2 +6d +2b +cb +c4 +e2 +6d +2b +08 +c5 +f5 +67 +cb +c5 +f5 +67 +08 +c5 +ed +67 +cb +c5 +ed +67 +08 +c5 +f5 +fc +cb +c5 +f5 +fc +08 +c5 +ed +fc +cb +c5 +ed +fc +08 +c5 +f5 +fd +cb +c5 +f5 +fd +08 +c5 +ed +fd +cb +c5 +ed +fd +08 +c5 +f5 +fe +cb +c5 +f5 +fe +08 +c5 +ed +fe +cb +c5 +ed +fe +08 +c5 +f5 +d4 +cb +c5 +f5 +d4 +08 +c5 +ed +d4 +cb +c5 +ed +d4 +08 +c5 +f5 +ec +cb +c5 +f5 +ec +08 +c5 +ed +ec +cb +c5 +ed +ec +08 +c5 +f5 +ed +cb +c5 +f5 +ed +08 +c5 +ed +ed +cb +c5 +ed +ed +08 +c5 +f5 +dc +cb +c5 +f5 +dc +08 +c5 +ed +dc +cb +c5 +ed +dc +08 +c5 +f5 +dd +cb +c5 +f5 +dd +08 +c5 +ed +dd +cb +c5 +ed +dd +08 +c4 +e3 +6d +0f +cb +03 +c4 +e3 +6d +0f +08 +03 +c5 +f5 +db +cb +c5 +f5 +db +08 +c5 +ed +db +cb +c5 +ed +db +08 +c5 +f5 +df +cb +c5 +f5 +df +08 +c5 +ed +df +cb +c5 +ed +df +08 +c5 +f5 +e0 +cb +c5 +f5 +e0 +08 +c5 +ed +e0 +cb +c5 +ed +e0 +08 +c5 +f5 +e3 +cb +c5 +f5 +e3 +08 +c5 +ed +e3 +cb +c5 +ed +e3 +08 +c4 +e3 +6d +4c +cb +40 +c4 +e3 +6d +4c +08 +40 +c4 +e3 +75 +0e +cb +03 +c4 +e3 +75 +0e +08 +03 +c4 +e3 +6d +0e +cb +03 +c4 +e3 +6d +0e +08 +03 +c5 +f5 +74 +cb +c5 +f5 +74 +08 +c5 +ed +74 +cb +c5 +ed +74 +08 +c5 +f5 +75 +cb +c5 +f5 +75 +08 +c5 +ed +75 +cb +c5 +ed +75 +08 +c5 +f5 +76 +cb +c5 +f5 +76 +08 +c5 +ed +76 +cb +c5 +ed +76 +08 +c4 +e2 +75 +29 +cb +c4 +e2 +75 +29 +08 +c4 +e2 +6d +29 +cb +c4 +e2 +6d +29 +08 +c5 +f5 +64 +cb +c5 +f5 +64 +08 +c5 +ed +64 +cb +c5 +ed +64 +08 +c5 +f5 +65 +cb +c5 +f5 +65 +08 +c5 +ed +65 +cb +c5 +ed +65 +08 +c5 +f5 +66 +cb +c5 +f5 +66 +08 +c5 +ed +66 +cb +c5 +ed +66 +08 +c4 +e2 +75 +37 +cb +c4 +e2 +75 +37 +08 +c4 +e2 +6d +37 +cb +c4 +e2 +6d +37 +08 +c4 +e2 +75 +01 +cb +c4 +e2 +75 +01 +08 +c4 +e2 +6d +01 +cb +c4 +e2 +6d +01 +08 +c4 +e2 +75 +02 +cb +c4 +e2 +75 +02 +08 +c4 +e2 +6d +02 +cb +c4 +e2 +6d +02 +08 +c4 +e2 +75 +03 +cb +c4 +e2 +75 +03 +08 +c4 +e2 +6d +03 +cb +c4 +e2 +6d +03 +08 +c4 +e2 +75 +05 +cb +c4 +e2 +75 +05 +08 +c4 +e2 +6d +05 +cb +c4 +e2 +6d +05 +08 +c4 +e2 +75 +06 +cb +c4 +e2 +75 +06 +08 +c4 +e2 +6d +06 +cb +c4 +e2 +6d +06 +08 +c4 +e2 +75 +07 +cb +c4 +e2 +75 +07 +08 +c4 +e2 +6d +07 +cb +c4 +e2 +6d +07 +08 +c4 +e2 +75 +04 +cb +c4 +e2 +75 +04 +08 +c4 +e2 +6d +04 +cb +c4 +e2 +6d +04 +08 +c5 +f5 +f5 +cb +c5 +f5 +f5 +08 +c5 +ed +f5 +cb +c5 +ed +f5 +08 +c4 +e2 +75 +3c +cb +c4 +e2 +75 +3c +08 +c4 +e2 +6d +3c +cb +c4 +e2 +6d +3c +08 +c5 +f5 +ee +cb +c5 +f5 +ee +08 +c5 +ed +ee +cb +c5 +ed +ee +08 +c4 +e2 +75 +3d +cb +c4 +e2 +75 +3d +08 +c4 +e2 +6d +3d +cb +c4 +e2 +6d +3d +08 +c5 +f5 +de +cb +c5 +f5 +de +08 +c5 +ed +de +cb +c5 +ed +de +08 +c4 +e2 +75 +3e +cb +c4 +e2 +75 +3e +08 +c4 +e2 +6d +3e +cb +c4 +e2 +6d +3e +08 +c4 +e2 +75 +3f +cb +c4 +e2 +75 +3f +08 +c4 +e2 +6d +3f +cb +c4 +e2 +6d +3f +08 +c4 +e2 +75 +38 +cb +c4 +e2 +75 +38 +08 +c4 +e2 +6d +38 +cb +c4 +e2 +6d +38 +08 +c5 +f5 +ea +cb +c5 +f5 +ea +08 +c5 +ed +ea +cb +c5 +ed +ea +08 +c4 +e2 +75 +39 +cb +c4 +e2 +75 +39 +08 +c4 +e2 +6d +39 +cb +c4 +e2 +6d +39 +08 +c5 +f5 +da +cb +c5 +f5 +da +08 +c5 +ed +da +cb +c5 +ed +da +08 +c4 +e2 +75 +3a +cb +c4 +e2 +75 +3a +08 +c4 +e2 +6d +3a +cb +c4 +e2 +6d +3a +08 +c4 +e2 +75 +3b +cb +c4 +e2 +75 +3b +08 +c4 +e2 +6d +3b +cb +c4 +e2 +6d +3b +08 +c5 +fd +d7 +c1 +c5 +fd +d7 +c1 +c4 +e2 +7d +20 +ca +c4 +e2 +7d +20 +08 +c4 +e2 +7d +20 +08 +c4 +e2 +7d +21 +ca +c4 +e2 +7d +21 +08 +c4 +e2 +7d +21 +08 +c4 +e2 +7d +22 +ca +c4 +e2 +7d +22 +08 +c4 +e2 +7d +22 +08 +c4 +e2 +7d +23 +ca +c4 +e2 +7d +23 +08 +c4 +e2 +7d +23 +08 +c4 +e2 +7d +24 +ca +c4 +e2 +7d +24 +08 +c4 +e2 +7d +24 +08 +c4 +e2 +7d +25 +ca +c4 +e2 +7d +25 +08 +c4 +e2 +7d +25 +08 +c4 +e2 +7d +30 +ca +c4 +e2 +7d +30 +08 +c4 +e2 +7d +30 +08 +c4 +e2 +7d +31 +ca +c4 +e2 +7d +31 +08 +c4 +e2 +7d +31 +08 +c4 +e2 +7d +32 +ca +c4 +e2 +7d +32 +08 +c4 +e2 +7d +32 +08 +c4 +e2 +7d +33 +ca +c4 +e2 +7d +33 +08 +c4 +e2 +7d +33 +08 +c4 +e2 +7d +34 +ca +c4 +e2 +7d +34 +08 +c4 +e2 +7d +34 +08 +c4 +e2 +7d +35 +ca +c4 +e2 +7d +35 +08 +c4 +e2 +7d +35 +08 +c4 +e2 +75 +28 +cb +c4 +e2 +75 +28 +08 +c4 +e2 +6d +28 +cb +c4 +e2 +6d +28 +08 +c4 +e2 +75 +0b +cb +c4 +e2 +75 +0b +08 +c4 +e2 +6d +0b +cb +c4 +e2 +6d +0b +08 +c5 +f5 +e4 +cb +c5 +f5 +e4 +08 +c5 +ed +e4 +cb +c5 +ed +e4 +08 +c5 +f5 +e5 +cb +c5 +f5 +e5 +08 +c5 +ed +e5 +cb +c5 +ed +e5 +08 +c5 +f5 +d5 +cb +c5 +f5 +d5 +08 +c5 +ed +d5 +cb +c5 +ed +d5 +08 +c4 +e2 +75 +40 +cb +c4 +e2 +75 +40 +08 +c4 +e2 +6d +40 +cb +c4 +e2 +6d +40 +08 +c5 +f5 +f4 +cb +c5 +f5 +f4 +08 +c5 +ed +f4 +cb +c5 +ed +f4 +08 +c5 +f5 +eb +cb +c5 +f5 +eb +08 +c5 +ed +eb +cb +c5 +ed +eb +08 +c5 +f5 +f6 +cb +c5 +f5 +f6 +08 +c5 +ed +f6 +cb +c5 +ed +f6 +08 +c4 +e2 +75 +00 +cb +c4 +e2 +75 +00 +08 +c4 +e2 +6d +00 +cb +c4 +e2 +6d +00 +08 +c5 +fd +70 +cb +03 +c5 +fd +70 +08 +03 +c5 +fe +70 +cb +03 +c5 +fe +70 +08 +03 +c5 +ff +70 +cb +03 +c5 +ff +70 +08 +03 +c4 +e2 +75 +08 +cb +c4 +e2 +75 +08 +08 +c4 +e2 +6d +08 +cb +c4 +e2 +6d +08 +08 +c4 +e2 +75 +09 +cb +c4 +e2 +75 +09 +08 +c4 +e2 +6d +09 +cb +c4 +e2 +6d +09 +08 +c4 +e2 +75 +0a +cb +c4 +e2 +75 +0a +08 +c4 +e2 +6d +0a +cb +c4 +e2 +6d +0a +08 +c5 +f5 +73 +f9 +03 +c5 +f5 +73 +fa +03 +c5 +f5 +f1 +cb +c5 +f5 +f1 +08 +c5 +f5 +71 +f1 +03 +c5 +ed +f1 +cb +c5 +ed +f1 +08 +c5 +f5 +71 +f2 +03 +c5 +f5 +f2 +cb +c5 +f5 +f2 +08 +c5 +f5 +72 +f1 +03 +c5 +ed +f2 +cb +c5 +ed +f2 +08 +c5 +f5 +72 +f2 +03 +c5 +f5 +f3 +cb +c5 +f5 +f3 +08 +c5 +f5 +73 +f1 +03 +c5 +ed +f3 +cb +c5 +ed +f3 +08 +c5 +f5 +73 +f2 +03 +c5 +f5 +e1 +cb +c5 +f5 +e1 +08 +c5 +f5 +71 +e1 +03 +c5 +ed +e1 +cb +c5 +ed +e1 +08 +c5 +f5 +71 +e2 +03 +c5 +f5 +e2 +cb +c5 +f5 +e2 +08 +c5 +f5 +72 +e1 +03 +c5 +ed +e2 +cb +c5 +ed +e2 +08 +c5 +f5 +72 +e2 +03 +c5 +f5 +73 +d9 +03 +c5 +f5 +73 +da +03 +c5 +f5 +d1 +cb +c5 +f5 +d1 +08 +c5 +f5 +71 +d1 +03 +c5 +ed +d1 +cb +c5 +ed +d1 +08 +c5 +f5 +71 +d2 +03 +c5 +f5 +d2 +cb +c5 +f5 +d2 +08 +c5 +f5 +72 +d1 +03 +c5 +ed +d2 +cb +c5 +ed +d2 +08 +c5 +f5 +72 +d2 +03 +c5 +f5 +d2 +cb +c5 +f5 +d2 +08 +c5 +f5 +72 +d1 +03 +c5 +ed +d2 +cb +c5 +ed +d2 +08 +c5 +f5 +72 +d2 +03 +c5 +f5 +e8 +cb +c5 +f5 +e8 +08 +c5 +ed +e8 +cb +c5 +ed +e8 +08 +c5 +f5 +e9 +cb +c5 +f5 +e9 +08 +c5 +ed +e9 +cb +c5 +ed +e9 +08 +c5 +f5 +d8 +cb +c5 +f5 +d8 +08 +c5 +ed +d8 +cb +c5 +ed +d8 +08 +c5 +f5 +d9 +cb +c5 +f5 +d9 +08 +c5 +ed +d9 +cb +c5 +ed +d9 +08 +c5 +f5 +68 +cb +c5 +f5 +68 +08 +c5 +ed +68 +cb +c5 +ed +68 +08 +c5 +f5 +69 +cb +c5 +f5 +69 +08 +c5 +ed +69 +cb +c5 +ed +69 +08 +c5 +f5 +6a +cb +c5 +f5 +6a +08 +c5 +ed +6a +cb +c5 +ed +6a +08 +c5 +f5 +6d +cb +c5 +f5 +6d +08 +c5 +ed +6d +cb +c5 +ed +6d +08 +c5 +f5 +60 +cb +c5 +f5 +60 +08 +c5 +ed +60 +cb +c5 +ed +60 +08 +c5 +f5 +61 +cb +c5 +f5 +61 +08 +c5 +ed +61 +cb +c5 +ed +61 +08 +c5 +f5 +62 +cb +c5 +f5 +62 +08 +c5 +ed +62 +cb +c5 +ed +62 +08 +c5 +f5 +6c +cb +c5 +f5 +6c +08 +c5 +ed +6c +cb +c5 +ed +6c +08 +c5 +f5 +ef +cb +c5 +f5 +ef +08 +c5 +ed +ef +cb +c5 +ed +ef +08 +c4 +e2 +7d +2a +08 +c4 +e2 +79 +18 +ca +c4 +e2 +7d +18 +ca +c4 +e2 +7d +19 +ca +c4 +e2 +7d +5a +08 +c4 +e3 +6d +02 +cb +03 +c4 +e3 +6d +02 +08 +03 +c4 +e2 +79 +78 +ca +c4 +e2 +79 +78 +08 +c4 +e2 +7d +78 +ca +c4 +e2 +7d +78 +08 +c4 +e2 +79 +79 +ca +c4 +e2 +79 +79 +08 +c4 +e2 +7d +79 +ca +c4 +e2 +7d +79 +08 +c4 +e2 +79 +58 +ca +c4 +e2 +79 +58 +08 +c4 +e2 +7d +58 +ca +c4 +e2 +7d +58 +08 +c4 +e2 +79 +59 +ca +c4 +e2 +79 +59 +08 +c4 +e2 +7d +59 +ca +c4 +e2 +7d +59 +08 +c4 +e2 +6d +36 +cb +c4 +e2 +6d +36 +08 +c4 +e3 +fd +01 +ca +03 +c4 +e3 +fd +01 +08 +03 +c4 +e2 +6d +16 +cb +c4 +e2 +6d +16 +08 +c4 +e3 +fd +00 +ca +03 +c4 +e3 +fd +00 +08 +03 +c4 +e3 +6d +46 +cb +03 +c4 +e3 +6d +46 +08 +03 +c4 +e3 +7d +39 +d1 +03 +c4 +e3 +7d +39 +10 +03 +c4 +e3 +6d +38 +cb +03 +c4 +e3 +6d +38 +08 +03 +c4 +e2 +69 +8c +08 +c4 +e2 +6d +8c +08 +c4 +e2 +71 +8e +10 +c4 +e2 +75 +8e +10 +c4 +e2 +e9 +8c +08 +c4 +e2 +ed +8c +08 +c4 +e2 +f1 +8e +10 +c4 +e2 +f5 +8e +10 +c4 +e2 +69 +47 +cb +c4 +e2 +69 +47 +08 +c4 +e2 +6d +47 +cb +c4 +e2 +6d +47 +08 +c4 +e2 +e9 +47 +cb +c4 +e2 +e9 +47 +08 +c4 +e2 +ed +47 +cb +c4 +e2 +ed +47 +08 +c4 +e2 +69 +46 +cb +c4 +e2 +69 +46 +08 +c4 +e2 +6d +46 +cb +c4 +e2 +6d +46 +08 +c4 +e2 +69 +45 +cb +c4 +e2 +69 +45 +08 +c4 +e2 +6d +45 +cb +c4 +e2 +6d +45 +08 +c4 +e2 +e9 +45 +cb +c4 +e2 +e9 +45 +08 +c4 +e2 +ed +45 +cb +c4 +e2 +ed +45 +08 |