aboutsummaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorPeter Johnson <peter@tortall.net>2011-07-03 14:36:37 -0700
committerPeter Johnson <peter@tortall.net>2011-07-03 14:36:37 -0700
commitd779fcb04e7b47b6054483a498ec3ad77428bb24 (patch)
treeeabe708b9347a6354018ee4f0aacc194795bb32c /modules
parentfc7724a3df1dd3b65317f66547573939a1f269e6 (diff)
downloadyasm-d779fcb04e7b47b6054483a498ec3ad77428bb24.tar.gz
Add most Intel AVX2 instructions.
Reference: http://www.intel.com/software/avx rev11 spec This is all AVX2 instructions except for VGATHER*/VPGATHER*, which require additional ModRM handling. Portions contributed by: Mark Charney <mark.charney@intel.com> Part of [#227].
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/arch/x86/gen_x86_insn.py679
-rw-r--r--modules/arch/x86/tests/Makefile.inc2
-rw-r--r--modules/arch/x86/tests/avx2.asm612
-rw-r--r--modules/arch/x86/tests/avx2.hex2105
4 files changed, 3280 insertions, 118 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index ded19ea5..5dec9e81 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -3997,44 +3997,44 @@ add_insn("punpckldq", "mmxsse2", modifiers=[0x62])
add_insn("pxor", "mmxsse2", modifiers=[0xEF])
# AVX versions don't support the MMX registers
-add_insn("vpackssdw", "xmm_xmm128", modifiers=[0x66, 0x6B, VEXL0], avx=True)
-add_insn("vpacksswb", "xmm_xmm128", modifiers=[0x66, 0x63, VEXL0], avx=True)
-add_insn("vpackuswb", "xmm_xmm128", modifiers=[0x66, 0x67, VEXL0], avx=True)
-add_insn("vpaddb", "xmm_xmm128", modifiers=[0x66, 0xFC, VEXL0], avx=True)
-add_insn("vpaddw", "xmm_xmm128", modifiers=[0x66, 0xFD, VEXL0], avx=True)
-add_insn("vpaddd", "xmm_xmm128", modifiers=[0x66, 0xFE, VEXL0], avx=True)
-add_insn("vpaddq", "xmm_xmm128", modifiers=[0x66, 0xD4, VEXL0], avx=True)
-add_insn("vpaddsb", "xmm_xmm128", modifiers=[0x66, 0xEC, VEXL0], avx=True)
-add_insn("vpaddsw", "xmm_xmm128", modifiers=[0x66, 0xED, VEXL0], avx=True)
-add_insn("vpaddusb", "xmm_xmm128", modifiers=[0x66, 0xDC, VEXL0], avx=True)
-add_insn("vpaddusw", "xmm_xmm128", modifiers=[0x66, 0xDD, VEXL0], avx=True)
-add_insn("vpand", "xmm_xmm128", modifiers=[0x66, 0xDB, VEXL0], avx=True)
-add_insn("vpandn", "xmm_xmm128", modifiers=[0x66, 0xDF, VEXL0], avx=True)
-add_insn("vpcmpeqb", "xmm_xmm128", modifiers=[0x66, 0x74, VEXL0], avx=True)
-add_insn("vpcmpeqw", "xmm_xmm128", modifiers=[0x66, 0x75, VEXL0], avx=True)
-add_insn("vpcmpeqd", "xmm_xmm128", modifiers=[0x66, 0x76, VEXL0], avx=True)
-add_insn("vpcmpgtb", "xmm_xmm128", modifiers=[0x66, 0x64, VEXL0], avx=True)
-add_insn("vpcmpgtw", "xmm_xmm128", modifiers=[0x66, 0x65, VEXL0], avx=True)
-add_insn("vpcmpgtd", "xmm_xmm128", modifiers=[0x66, 0x66, VEXL0], avx=True)
-add_insn("vpmaddwd", "xmm_xmm128", modifiers=[0x66, 0xF5, VEXL0], avx=True)
-add_insn("vpmulhw", "xmm_xmm128", modifiers=[0x66, 0xE5, VEXL0], avx=True)
-add_insn("vpmullw", "xmm_xmm128", modifiers=[0x66, 0xD5, VEXL0], avx=True)
-add_insn("vpor", "xmm_xmm128", modifiers=[0x66, 0xEB, VEXL0], avx=True)
-add_insn("vpsubb", "xmm_xmm128", modifiers=[0x66, 0xF8, VEXL0], avx=True)
-add_insn("vpsubw", "xmm_xmm128", modifiers=[0x66, 0xF9, VEXL0], avx=True)
-add_insn("vpsubd", "xmm_xmm128", modifiers=[0x66, 0xFA, VEXL0], avx=True)
-add_insn("vpsubq", "xmm_xmm128", modifiers=[0x66, 0xFB, VEXL0], avx=True)
-add_insn("vpsubsb", "xmm_xmm128", modifiers=[0x66, 0xE8, VEXL0], avx=True)
-add_insn("vpsubsw", "xmm_xmm128", modifiers=[0x66, 0xE9, VEXL0], avx=True)
-add_insn("vpsubusb", "xmm_xmm128", modifiers=[0x66, 0xD8, VEXL0], avx=True)
-add_insn("vpsubusw", "xmm_xmm128", modifiers=[0x66, 0xD9, VEXL0], avx=True)
-add_insn("vpunpckhbw", "xmm_xmm128", modifiers=[0x66, 0x68, VEXL0], avx=True)
-add_insn("vpunpckhwd", "xmm_xmm128", modifiers=[0x66, 0x69, VEXL0], avx=True)
-add_insn("vpunpckhdq", "xmm_xmm128", modifiers=[0x66, 0x6A, VEXL0], avx=True)
-add_insn("vpunpcklbw", "xmm_xmm128", modifiers=[0x66, 0x60, VEXL0], avx=True)
-add_insn("vpunpcklwd", "xmm_xmm128", modifiers=[0x66, 0x61, VEXL0], avx=True)
-add_insn("vpunpckldq", "xmm_xmm128", modifiers=[0x66, 0x62, VEXL0], avx=True)
-add_insn("vpxor", "xmm_xmm128", modifiers=[0x66, 0xEF, VEXL0], avx=True)
+add_insn("vpackssdw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6B, VEXL0], avx=True)
+add_insn("vpacksswb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x63, VEXL0], avx=True)
+add_insn("vpackuswb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x67, VEXL0], avx=True)
+add_insn("vpaddb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFC, VEXL0], avx=True)
+add_insn("vpaddw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFD, VEXL0], avx=True)
+add_insn("vpaddd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFE, VEXL0], avx=True)
+add_insn("vpaddq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD4, VEXL0], avx=True)
+add_insn("vpaddsb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEC, VEXL0], avx=True)
+add_insn("vpaddsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xED, VEXL0], avx=True)
+add_insn("vpaddusb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDC, VEXL0], avx=True)
+add_insn("vpaddusw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDD, VEXL0], avx=True)
+add_insn("vpand", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDB, VEXL0], avx=True)
+add_insn("vpandn", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDF, VEXL0], avx=True)
+add_insn("vpcmpeqb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x74, VEXL0], avx=True)
+add_insn("vpcmpeqw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x75, VEXL0], avx=True)
+add_insn("vpcmpeqd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x76, VEXL0], avx=True)
+add_insn("vpcmpgtb", "xmm_xmm128_256avx2", modifiers=[0x66, 0x64, VEXL0], avx=True)
+add_insn("vpcmpgtw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x65, VEXL0], avx=True)
+add_insn("vpcmpgtd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x66, VEXL0], avx=True)
+add_insn("vpmaddwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF5, VEXL0], avx=True)
+add_insn("vpmulhw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE5, VEXL0], avx=True)
+add_insn("vpmullw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD5, VEXL0], avx=True)
+add_insn("vpor", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEB, VEXL0], avx=True)
+add_insn("vpsubb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF8, VEXL0], avx=True)
+add_insn("vpsubw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF9, VEXL0], avx=True)
+add_insn("vpsubd", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFA, VEXL0], avx=True)
+add_insn("vpsubq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xFB, VEXL0], avx=True)
+add_insn("vpsubsb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE8, VEXL0], avx=True)
+add_insn("vpsubsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE9, VEXL0], avx=True)
+add_insn("vpsubusb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD8, VEXL0], avx=True)
+add_insn("vpsubusw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xD9, VEXL0], avx=True)
+add_insn("vpunpckhbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x68, VEXL0], avx=True)
+add_insn("vpunpckhwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x69, VEXL0], avx=True)
+add_insn("vpunpckhdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6A, VEXL0], avx=True)
+add_insn("vpunpcklbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x60, VEXL0], avx=True)
+add_insn("vpunpcklwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x61, VEXL0], avx=True)
+add_insn("vpunpckldq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x62, VEXL0], avx=True)
+add_insn("vpxor", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEF, VEXL0], avx=True)
add_group("pshift",
cpu=["MMX"],
@@ -4075,42 +4075,43 @@ add_insn("psrld", "pshift", modifiers=[0xD2, 0x72, 2])
add_insn("psrlq", "pshift", modifiers=[0xD3, 0x73, 2])
# Ran out of modifiers, so AVX has to be separate
-add_group("vpshift",
- cpu=["AVX"],
- modifiers=["Op1Add"],
- vex=128,
- prefix=0x66,
- opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_group("vpshift",
- cpu=["AVX"],
- modifiers=["Gap", "Op1Add", "SpAdd"],
- vex=128,
- prefix=0x66,
- opcode=[0x0F, 0x00],
- spare=0,
- operands=[Operand(type="SIMDReg", size=128, dest="EAVEX"),
- Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_group("vpshift",
- cpu=["AVX"],
- modifiers=["Op1Add"],
- vex=128,
- prefix=0x66,
- opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDReg", size=128, dest="VEX"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_group("vpshift",
- cpu=["AVX"],
- modifiers=["Gap", "Op1Add", "SpAdd"],
- vex=128,
- prefix=0x66,
- opcode=[0x0F, 0x00],
- spare=0,
- operands=[Operand(type="SIMDReg", size=128, dest="VEX"),
- Operand(type="SIMDReg", size=128, dest="EA"),
- Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for cpu, sz in zip(["AVX", "AVX2"], [128, 256]):
+ add_group("vpshift",
+ cpu=[cpu],
+ modifiers=["Op1Add"],
+ vex=sz,
+ prefix=0x66,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+ add_group("vpshift",
+ cpu=[cpu],
+ modifiers=["Gap", "Op1Add", "SpAdd"],
+ vex=sz,
+ prefix=0x66,
+ opcode=[0x0F, 0x00],
+ spare=0,
+ operands=[Operand(type="SIMDReg", size=sz, dest="EAVEX"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+ add_group("vpshift",
+ cpu=[cpu],
+ modifiers=["Op1Add"],
+ vex=sz,
+ prefix=0x66,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+ add_group("vpshift",
+ cpu=[cpu],
+ modifiers=["Gap", "Op1Add", "SpAdd"],
+ vex=sz,
+ prefix=0x66,
+ opcode=[0x0F, 0x00],
+ spare=0,
+ operands=[Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDReg", size=sz, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
add_insn("vpsllw", "vpshift", modifiers=[0xF1, 0x71, 6])
add_insn("vpslld", "vpshift", modifiers=[0xF2, 0x72, 6])
@@ -4134,14 +4135,14 @@ add_insn("pmulhuw", "mmxsse2", modifiers=[0xE4], cpu=["P3", "MMX"])
add_insn("psadbw", "mmxsse2", modifiers=[0xF6], cpu=["P3", "MMX"])
# AVX versions don't support MMX register
-add_insn("vpavgb", "xmm_xmm128", modifiers=[0x66, 0xE0, VEXL0], avx=True)
-add_insn("vpavgw", "xmm_xmm128", modifiers=[0x66, 0xE3, VEXL0], avx=True)
-add_insn("vpmaxsw", "xmm_xmm128", modifiers=[0x66, 0xEE, VEXL0], avx=True)
-add_insn("vpmaxub", "xmm_xmm128", modifiers=[0x66, 0xDE, VEXL0], avx=True)
-add_insn("vpminsw", "xmm_xmm128", modifiers=[0x66, 0xEA, VEXL0], avx=True)
-add_insn("vpminub", "xmm_xmm128", modifiers=[0x66, 0xDA, VEXL0], avx=True)
-add_insn("vpmulhuw", "xmm_xmm128", modifiers=[0x66, 0xE4, VEXL0], avx=True)
-add_insn("vpsadbw", "xmm_xmm128", modifiers=[0x66, 0xF6, VEXL0], avx=True)
+add_insn("vpavgb", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE0, VEXL0], avx=True)
+add_insn("vpavgw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE3, VEXL0], avx=True)
+add_insn("vpmaxsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEE, VEXL0], avx=True)
+add_insn("vpmaxub", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDE, VEXL0], avx=True)
+add_insn("vpminsw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xEA, VEXL0], avx=True)
+add_insn("vpminub", "xmm_xmm128_256avx2", modifiers=[0x66, 0xDA, VEXL0], avx=True)
+add_insn("vpmulhuw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE4, VEXL0], avx=True)
+add_insn("vpsadbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF6, VEXL0], avx=True)
add_insn("prefetchnta", "twobytemem", modifiers=[0, 0x0F, 0x18], cpu=["P3"])
add_insn("prefetcht0", "twobytemem", modifiers=[1, 0x0F, 0x18], cpu=["P3"])
@@ -4172,6 +4173,49 @@ add_group("xmm_xmm128_256",
vex=256,
prefix=0x00,
opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256",
+ cpu=["AVX"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=256,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+
+# Same as above, except 256-bit version only available in AVX2
+add_group("xmm_xmm128_256avx2",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add", "SetVEX"],
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+ cpu=["AVX"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=128,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="VEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+ cpu=["AVX2"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=256,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+ cpu=["AVX2"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=256,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="SIMDReg", size=256, dest="VEX"),
Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
@@ -4526,6 +4570,23 @@ add_group("xmm_xmm128_imm",
add_insn("cmpps", "xmm_xmm128_imm", modifiers=[0, 0xC2])
add_insn("shufps", "xmm_xmm128_imm", modifiers=[0, 0xC6])
+# YMM register AVX2 version of above
+add_group("xmm_xmm128_imm_256avx2",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add", "SetVEX"],
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("xmm_xmm128_imm_256avx2",
+ cpu=["AVX2"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=256,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
# YMM register and 4-operand version of above
add_group("xmm_xmm128_imm_256",
cpu=["SSE"],
@@ -4991,6 +5052,14 @@ add_group("pmovmskb",
operands=[Operand(type="Reg", size=32, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
add_group("pmovmskb",
+ suffix="l",
+ cpu=["AVX2"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0xD7],
+ operands=[Operand(type="Reg", size=32, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="EA")])
+add_group("pmovmskb",
suffix="q",
cpu=["MMX", "P3"],
notavx=True,
@@ -5009,6 +5078,16 @@ add_group("pmovmskb",
opcode=[0x0F, 0xD7],
operands=[Operand(type="Reg", size=64, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("pmovmskb",
+ suffix="q",
+ cpu=["SSE2"],
+ vex=256,
+ opersize=64,
+ def_opersize_64=64,
+ prefix=0x66,
+ opcode=[0x0F, 0xD7],
+ operands=[Operand(type="Reg", size=64, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="EA")])
add_insn("pmovmskb", "pmovmskb")
add_insn("vpmovmskb", "pmovmskb", modifiers=[VEXL0], avx=True)
@@ -5419,12 +5498,12 @@ add_insn("punpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C], cpu=["SSE2"])
add_insn("vcvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C, VEXL0], avx=True)
# vcvttpd2dq takes xmm, ymm combination
# vcvttps2dq is two-operand
-add_insn("vpmuludq", "xmm_xmm128", modifiers=[0x66, 0xF4, VEXL0], avx=True)
-add_insn("vpshufd", "xmm_xmm128_imm", modifiers=[0x66, 0x70, VEXL0], avx=True)
-add_insn("vpshufhw", "xmm_xmm128_imm", modifiers=[0xF3, 0x70, VEXL0], avx=True)
-add_insn("vpshuflw", "xmm_xmm128_imm", modifiers=[0xF2, 0x70, VEXL0], avx=True)
-add_insn("vpunpckhqdq", "xmm_xmm128", modifiers=[0x66, 0x6D, VEXL0], avx=True)
-add_insn("vpunpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C, VEXL0], avx=True)
+add_insn("vpmuludq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF4, VEXL0], avx=True)
+add_insn("vpshufd", "xmm_xmm128_imm_256avx2", modifiers=[0x66, 0x70, VEXL0], avx=True)
+add_insn("vpshufhw", "xmm_xmm128_imm_256avx2", modifiers=[0xF3, 0x70, VEXL0], avx=True)
+add_insn("vpshuflw", "xmm_xmm128_imm_256avx2", modifiers=[0xF2, 0x70, VEXL0], avx=True)
+add_insn("vpunpckhqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6D, VEXL0], avx=True)
+add_insn("vpunpcklqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6C, VEXL0], avx=True)
add_insn("cvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A], cpu=["SSE2"])
add_insn("vcvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A, VEXL0], avx=True)
@@ -5480,6 +5559,25 @@ add_group("pslrldq",
operands=[Operand(type="SIMDReg", size=128, dest="VEX"),
Operand(type="SIMDReg", size=128, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pslrldq",
+ cpu=["AVX2"],
+ modifiers=["SpAdd"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x73],
+ spare=0,
+ operands=[Operand(type="SIMDReg", size=256, dest="EAVEX"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pslrldq",
+ cpu=["AVX2"],
+ modifiers=["SpAdd"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x73],
+ spare=0,
+ operands=[Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDReg", size=256, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
add_insn("pslldq", "pslrldq", modifiers=[7])
add_insn("psrldq", "pslrldq", modifiers=[3])
@@ -5556,6 +5654,23 @@ add_group("ssse3",
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="VEX"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("ssse3",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("ssse3",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
add_insn("pshufb", "ssse3", modifiers=[0x00])
add_insn("phaddw", "ssse3", modifiers=[0x01])
@@ -5604,7 +5719,7 @@ add_group("ssse3imm",
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
add_insn("palignr", "ssse3imm", modifiers=[0x0F])
-add_insn("vpalignr", "sse4imm", modifiers=[0x0F, VEXL0], avx=True)
+add_insn("vpalignr", "sse4imm_256avx2", modifiers=[0x0F, VEXL0], avx=True)
#####################################################################
# SSE4.1 / SSE4.2 instructions
@@ -5684,6 +5799,54 @@ add_group("sse4imm_256",
vex=256,
prefix=0x66,
opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256",
+ cpu=["AVX"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+# Same as above except AVX2 required for 256-bit.
+add_group("sse4imm_256avx2",
+ cpu=["SSE41"],
+ modifiers=["Op2Add", "SetVEX"],
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+ cpu=["AVX"],
+ modifiers=["Op2Add"],
+ vex=128,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="VEX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="SIMDReg", size=256, dest="VEX"),
Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
@@ -5758,13 +5921,14 @@ add_insn("roundps", "sse4imm", modifiers=[0x08])
add_insn("roundsd", "sse4m64imm", modifiers=[0x0B])
add_insn("roundss", "sse4m32imm", modifiers=[0x0A])
-# vdppd, vmpsadbw, and vpblendw do not allow YMM registers
+# vdppd does not allow YMM registers
+# vmpsadbw and vpblendw do not allow YMM registers unless AVX2
add_insn("vblendpd", "sse4imm_256", modifiers=[0x0D, VEXL0], avx=True)
add_insn("vblendps", "sse4imm_256", modifiers=[0x0C, VEXL0], avx=True)
add_insn("vdppd", "sse4imm", modifiers=[0x41, VEXL0], avx=True)
add_insn("vdpps", "sse4imm_256", modifiers=[0x40, VEXL0], avx=True)
-add_insn("vmpsadbw", "sse4imm", modifiers=[0x42, VEXL0], avx=True)
-add_insn("vpblendw", "sse4imm", modifiers=[0x0E, VEXL0], avx=True)
+add_insn("vmpsadbw", "sse4imm_256avx2", modifiers=[0x42, VEXL0], avx=True)
+add_insn("vpblendw", "sse4imm_256avx2", modifiers=[0x0E, VEXL0], avx=True)
# vroundpd and vroundps don't add another register operand
add_insn("vroundsd", "sse4m64imm", modifiers=[0x0B, VEXL0], avx=True)
add_insn("vroundss", "sse4m32imm", modifiers=[0x0A, VEXL0], avx=True)
@@ -5814,9 +5978,9 @@ add_group("avx_sse4xmm0",
add_insn("vblendvpd", "avx_sse4xmm0", modifiers=[0x4B])
add_insn("vblendvps", "avx_sse4xmm0", modifiers=[0x4A])
-# vpblendvb doesn't have a 256-bit form
-add_group("avx_sse4xmm0_128",
- cpu=["AVX"],
+# vpblendvb didn't have a 256-bit form until AVX2
+add_group("avx2_sse4xmm0",
+ cpu=["AVX2"],
modifiers=["Op2Add"],
vex=128,
prefix=0x66,
@@ -5825,8 +5989,18 @@ add_group("avx_sse4xmm0_128",
Operand(type="SIMDReg", size=128, dest="VEX"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="SIMDReg", size=128, dest="VEXImmSrc")])
+add_group("avx2_sse4xmm0",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=256, dest="VEXImmSrc")])
-add_insn("vpblendvb", "avx_sse4xmm0_128", modifiers=[0x4C])
+add_insn("vpblendvb", "avx2_sse4xmm0", modifiers=[0x4C])
for sfx, sz in zip("bwl", [8, 16, 32]):
add_group("crc32",
@@ -5915,6 +6089,13 @@ add_group("movntdqa",
opcode=[0x0F, 0x38, 0x2A],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="Mem", size=128, relaxed=True, dest="EA")])
+add_group("movntdqa",
+ cpu=["AVX2"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x2A],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="Mem", size=256, relaxed=True, dest="EA")])
add_insn("movntdqa", "movntdqa")
add_insn("vmovntdqa", "movntdqa", modifiers=[VEXL0], avx=True)
@@ -6089,6 +6270,22 @@ for sz in [16, 32, 64]:
opcode=[0x0F, 0x38, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
+ add_group("sse4m%d" % sz,
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="Mem", size=sz*2, relaxed=True, dest="EA")])
+ add_group("sse4m%d" % sz,
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
add_insn("pmovsxbw", "sse4m64", modifiers=[0x20])
add_insn("pmovsxwd", "sse4m64", modifiers=[0x23])
@@ -6365,12 +6562,22 @@ add_group("avx_ssse3_2op",
opcode=[0x0F, 0x38, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-
-add_insn("vpabsb", "avx_ssse3_2op", modifiers=[0x1C], avx=True)
-add_insn("vpabsw", "avx_ssse3_2op", modifiers=[0x1D], avx=True)
-add_insn("vpabsd", "avx_ssse3_2op", modifiers=[0x1E], avx=True)
add_insn("vphminposuw", "avx_ssse3_2op", modifiers=[0x41], avx=True)
+# VPABS* are extended to 256-bit in AVX2
+for cpu, sz in zip(["AVX", "AVX2"], [128, 256]):
+ add_group("avx2_ssse3_2op",
+ cpu=[cpu],
+ modifiers=["Op2Add"],
+ vex=sz,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+add_insn("vpabsb", "avx2_ssse3_2op", modifiers=[0x1C], avx=True)
+add_insn("vpabsw", "avx2_ssse3_2op", modifiers=[0x1D], avx=True)
+add_insn("vpabsd", "avx2_ssse3_2op", modifiers=[0x1E], avx=True)
+
# Some conversion functions take xmm, ymm combination
# Need separate x and y versions for gas mode
add_group("avx_cvt_xmm128_x",
@@ -6437,6 +6644,20 @@ add_group("vbroadcastss",
opcode=[0x0F, 0x38, 0x18],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+add_group("vbroadcastss",
+ cpu=["AVX2"],
+ vex=128,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x18],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("vbroadcastss",
+ cpu=["AVX2"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x18],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
add_insn("vbroadcastss", "vbroadcastss")
@@ -6447,41 +6668,51 @@ add_group("vbroadcastsd",
opcode=[0x0F, 0x38, 0x19],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_group("vbroadcastsd",
+ cpu=["AVX2"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x19],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
add_insn("vbroadcastsd", "vbroadcastsd")
-add_group("vbroadcastf128",
- cpu=["AVX"],
+add_group("vbroadcastif128",
+ modifiers=["Op2Add"],
vex=256,
prefix=0x66,
- opcode=[0x0F, 0x38, 0x1A],
+ opcode=[0x0F, 0x38, 0x00],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="Mem", size=128, relaxed=True, dest="EA")])
-add_insn("vbroadcastf128", "vbroadcastf128")
+add_insn("vbroadcastf128", "vbroadcastif128", modifiers=[0x1A], cpu=["AVX"])
+add_insn("vbroadcasti128", "vbroadcastif128", modifiers=[0x5A], cpu=["AVX2"])
-add_group("vextractf128",
- cpu=["AVX"],
+add_group("vextractif128",
+ modifiers=["Op2Add"],
vex=256,
prefix=0x66,
- opcode=[0x0F, 0x3A, 0x19],
+ opcode=[0x0F, 0x3A, 0x00],
operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_insn("vextractf128", "vextractf128")
+add_insn("vextractf128", "vextractif128", modifiers=[0x19], cpu=["AVX"])
+add_insn("vextracti128", "vextractif128", modifiers=[0x39], cpu=["AVX2"])
-add_group("vinsertf128",
- cpu=["AVX"],
+add_group("vinsertif128",
+ modifiers=["Op2Add"],
vex=256,
prefix=0x66,
- opcode=[0x0F, 0x3A, 0x18],
+ opcode=[0x0F, 0x3A, 0x00],
operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
Operand(type="SIMDReg", size=256, dest="VEX"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_insn("vinsertf128", "vinsertf128")
+add_insn("vinsertf128", "vinsertif128", modifiers=[0x18], cpu=["AVX"])
+add_insn("vinserti128", "vinsertif128", modifiers=[0x38], cpu=["AVX2"])
add_group("vzero",
cpu=["AVX"],
@@ -6493,7 +6724,6 @@ add_insn("vzeroall", "vzero", modifiers=[VEXL1])
add_insn("vzeroupper", "vzero", modifiers=[VEXL0])
add_group("vmaskmov",
- cpu=["AVX"],
modifiers=["Op2Add"],
vex=128,
prefix=0x66,
@@ -6502,7 +6732,6 @@ add_group("vmaskmov",
Operand(type="SIMDReg", size=128, dest="VEX"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
add_group("vmaskmov",
- cpu=["AVX"],
modifiers=["Op2Add"],
vex=256,
prefix=0x66,
@@ -6511,7 +6740,6 @@ add_group("vmaskmov",
Operand(type="SIMDReg", size=256, dest="VEX"),
Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
add_group("vmaskmov",
- cpu=["AVX"],
modifiers=["Op2Add"],
vex=128,
prefix=0x66,
@@ -6520,7 +6748,6 @@ add_group("vmaskmov",
Operand(type="SIMDReg", size=128, dest="VEX"),
Operand(type="SIMDReg", size=128, dest="Spare")])
add_group("vmaskmov",
- cpu=["AVX"],
modifiers=["Op2Add"],
vex=256,
prefix=0x66,
@@ -6529,8 +6756,8 @@ add_group("vmaskmov",
Operand(type="SIMDReg", size=256, dest="VEX"),
Operand(type="SIMDReg", size=256, dest="Spare")])
-add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C])
-add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D])
+add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C], cpu=["AVX"])
+add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D], cpu=["AVX"])
add_group("vpermil",
cpu=["AVX"],
@@ -6585,6 +6812,222 @@ add_group("vperm2f128",
add_insn("vperm2f128", "vperm2f128")
#####################################################################
+# Intel AVX2 instructions
+#####################################################################
+
+# Most AVX2 instructions are mixed in with above SSEx/AVX groups.
+# Some make more sense to have separate groups.
+
+# vex.vvvv=1111b
+add_group("vperm_var_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+
+add_insn("vpermd", "vperm_var_avx2", modifiers=[0x36])
+add_insn("vpermps", "vperm_var_avx2", modifiers=[0x16])
+
+# vex.vvvv=1111b
+add_group("vperm_imm_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vpermq", "vperm_imm_avx2", modifiers=[0x00])
+add_insn("vpermpd", "vperm_imm_avx2", modifiers=[0x01])
+
+add_group("vperm2i128_avx2",
+ cpu=["AVX2"],
+ vex=256,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x46],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="SIMDReg", size=256, dest="VEX"),
+ Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vperm2i128", "vperm2i128_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastb_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x78],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastb_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x78],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="RM", size=8, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastb", "vpbroadcastb_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastw_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x79],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastw_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x79],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="RM", size=16, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastw", "vpbroadcastw_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastd_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x58],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastd_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x58],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="RM", size=32, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastd", "vpbroadcastd_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastq_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x59],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vpbroadcastq_avx2",
+ cpu=["AVX2"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x59],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="RM", size=64, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastq", "vpbroadcastq_avx2")
+
+for sz in [128, 256]:
+ add_group("vpshiftv_vexw0_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+
+for sz in [128, 256]:
+ add_group("vpshiftv_vexw1_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=sz,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("vpsrlvd", "vpshiftv_vexw0_avx2", modifiers=[0x45])
+add_insn("vpsrlvq", "vpshiftv_vexw1_avx2", modifiers=[0x45])
+add_insn("vpsravd", "vpshiftv_vexw0_avx2", modifiers=[0x46])
+
+add_insn("vpsllvd", "vpshiftv_vexw0_avx2", modifiers=[0x47])
+add_insn("vpsllvq", "vpshiftv_vexw1_avx2", modifiers=[0x47])
+
+add_insn("vpmaskmovd", "vmaskmov", modifiers=[0x8C], cpu=["AVX2"])
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+ add_group("vmaskmov_vexw1_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=sz,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+
+for sz in [128, 256]:
+ add_group("vmaskmov_vexw1_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=sz,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x02],
+ operands=[Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDReg", size=sz, dest="Spare")])
+
+add_insn("vpmaskmovq", "vmaskmov_vexw1_avx2", modifiers=[0x8C])
+
+for sz in [128, 256]:
+ add_group("vex_66_0F3A_imm8_avx2",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=sz,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+ Operand(type="SIMDReg", size=sz, dest="VEX"),
+ Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02])
+
+#####################################################################
# Intel FMA instructions
#####################################################################
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc
index fab543b3..7aebd992 100644
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -25,6 +25,8 @@ EXTRA_DIST += modules/arch/x86/tests/avx.asm
EXTRA_DIST += modules/arch/x86/tests/avx.hex
EXTRA_DIST += modules/arch/x86/tests/avx16.asm
EXTRA_DIST += modules/arch/x86/tests/avx16.hex
+EXTRA_DIST += modules/arch/x86/tests/avx2.asm
+EXTRA_DIST += modules/arch/x86/tests/avx2.hex
EXTRA_DIST += modules/arch/x86/tests/avxcc.asm
EXTRA_DIST += modules/arch/x86/tests/avxcc.hex
EXTRA_DIST += modules/arch/x86/tests/bittest.asm
diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm
new file mode 100644
index 00000000..acf17ade
--- /dev/null
+++ b/modules/arch/x86/tests/avx2.asm
@@ -0,0 +1,612 @@
+; Exhaustive test of AVX2 instructions
+;
+; Copyright (C) 2011 Peter Johnson
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; 1. Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; 2. Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+;
+
+[bits 64]
+
+vmpsadbw ymm1, ymm3, 3 ; c4 e3 75 42 cb 03
+vmpsadbw ymm1, yword [rax], 3 ; c4 e3 75 42 08 03
+vmpsadbw ymm1, ymm2, ymm3, 3 ; c4 e3 6d 42 cb 03
+vmpsadbw ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 42 08 03
+
+vpabsb ymm1, ymm2 ; c4 e2 7d 1c ca
+vpabsb ymm1, yword [rax] ; c4 e2 7d 1c 08
+
+vpabsw ymm1, ymm2 ; c4 e2 7d 1d ca
+vpabsw ymm1, yword [rax] ; c4 e2 7d 1d 08
+
+vpabsd ymm1, ymm2 ; c4 e2 7d 1e ca
+vpabsd ymm1, yword [rax] ; c4 e2 7d 1e 08
+
+vpacksswb ymm1, ymm3 ; c5 f5 63 cb
+vpacksswb ymm1, yword [rax] ; c5 f5 63 08
+vpacksswb ymm1, ymm2, ymm3 ; c5 ed 63 cb
+vpacksswb ymm1, ymm2, yword [rax] ; c5 ed 63 08
+
+vpackssdw ymm1, ymm3 ; c5 f5 6b cb
+vpackssdw ymm1, yword [rax] ; c5 f5 6b 08
+vpackssdw ymm1, ymm2, ymm3 ; c5 ed 6b cb
+vpackssdw ymm1, ymm2, yword [rax] ; c5 ed 6b 08
+
+vpackusdw ymm1, ymm3 ; c4 e2 75 2b cb
+vpackusdw ymm1, yword [rax] ; c4 e2 75 2b 08
+vpackusdw ymm1, ymm2, ymm3 ; c4 e2 6d 2b cb
+vpackusdw ymm1, ymm2, yword [rax] ; c4 e2 6d 2b 08
+
+vpackuswb ymm1, ymm3 ; c5 f5 67 cb
+vpackuswb ymm1, yword [rax] ; c5 f5 67 08
+vpackuswb ymm1, ymm2, ymm3 ; c5 ed 67 cb
+vpackuswb ymm1, ymm2, yword [rax] ; c5 ed 67 08
+
+vpaddb ymm1, ymm3 ; c5 f5 fc cb
+vpaddb ymm1, yword [rax] ; c5 f5 fc 08
+vpaddb ymm1, ymm2, ymm3 ; c5 ed fc cb
+vpaddb ymm1, ymm2, yword [rax] ; c5 ed fc 08
+
+vpaddw ymm1, ymm3 ; c5 f5 fd cb
+vpaddw ymm1, yword [rax] ; c5 f5 fd 08
+vpaddw ymm1, ymm2, ymm3 ; c5 ed fd cb
+vpaddw ymm1, ymm2, yword [rax] ; c5 ed fd 08
+
+vpaddd ymm1, ymm3 ; c5 f5 fe cb
+vpaddd ymm1, yword [rax] ; c5 f5 fe 08
+vpaddd ymm1, ymm2, ymm3 ; c5 ed fe cb
+vpaddd ymm1, ymm2, yword [rax] ; c5 ed fe 08
+
+vpaddq ymm1, ymm3 ; c5 f5 d4 cb
+vpaddq ymm1, yword [rax] ; c5 f5 d4 08
+vpaddq ymm1, ymm2, ymm3 ; c5 ed d4 cb
+vpaddq ymm1, ymm2, yword [rax] ; c5 ed d4 08
+
+vpaddsb ymm1, ymm3 ; c5 f5 ec cb
+vpaddsb ymm1, yword [rax] ; c5 f5 ec 08
+vpaddsb ymm1, ymm2, ymm3 ; c5 ed ec cb
+vpaddsb ymm1, ymm2, yword [rax] ; c5 ed ec 08
+
+vpaddsw ymm1, ymm3 ; c5 f5 ed cb
+vpaddsw ymm1, yword [rax] ; c5 f5 ed 08
+vpaddsw ymm1, ymm2, ymm3 ; c5 ed ed cb
+vpaddsw ymm1, ymm2, yword [rax] ; c5 ed ed 08
+
+vpaddusb ymm1, ymm3 ; c5 f5 dc cb
+vpaddusb ymm1, yword [rax] ; c5 f5 dc 08
+vpaddusb ymm1, ymm2, ymm3 ; c5 ed dc cb
+vpaddusb ymm1, ymm2, yword [rax] ; c5 ed dc 08
+
+vpaddusw ymm1, ymm3 ; c5 f5 dd cb
+vpaddusw ymm1, yword [rax] ; c5 f5 dd 08
+vpaddusw ymm1, ymm2, ymm3 ; c5 ed dd cb
+vpaddusw ymm1, ymm2, yword [rax] ; c5 ed dd 08
+
+vpalignr ymm1, ymm2, ymm3, 3 ; c4 e3 6d 0f cb 03
+vpalignr ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 0f 08 03
+
+vpand ymm1, ymm3 ; c5 f5 db cb
+vpand ymm1, yword [rax] ; c5 f5 db 08
+vpand ymm1, ymm2, ymm3 ; c5 ed db cb
+vpand ymm1, ymm2, yword [rax] ; c5 ed db 08
+
+vpandn ymm1, ymm3 ; c5 f5 df cb
+vpandn ymm1, yword [rax] ; c5 f5 df 08
+vpandn ymm1, ymm2, ymm3 ; c5 ed df cb
+vpandn ymm1, ymm2, yword [rax] ; c5 ed df 08
+
+vpavgb ymm1, ymm3 ; c5 f5 e0 cb
+vpavgb ymm1, yword [rax] ; c5 f5 e0 08
+vpavgb ymm1, ymm2, ymm3 ; c5 ed e0 cb
+vpavgb ymm1, ymm2, yword [rax] ; c5 ed e0 08
+
+vpavgw ymm1, ymm3 ; c5 f5 e3 cb
+vpavgw ymm1, yword [rax] ; c5 f5 e3 08
+vpavgw ymm1, ymm2, ymm3 ; c5 ed e3 cb
+vpavgw ymm1, ymm2, yword [rax] ; c5 ed e3 08
+
+vpblendvb ymm1, ymm2, ymm3, ymm4 ; c4 e3 6d 4c cb 40
+vpblendvb ymm1, ymm2, yword [rax], ymm4 ; c4 e3 6d 4c 08 40
+
+vpblendw ymm1, ymm3, 3 ; c4 e3 75 0e cb 03
+vpblendw ymm1, yword [rax], 3 ; c4 e3 75 0e 08 03
+vpblendw ymm1, ymm2, ymm3, 3 ; c4 e3 6d 0e cb 03
+vpblendw ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 0e 08 03
+
+vpcmpeqb ymm1, ymm3 ; c5 f5 74 cb
+vpcmpeqb ymm1, yword [rax] ; c5 f5 74 08
+vpcmpeqb ymm1, ymm2, ymm3 ; c5 ed 74 cb
+vpcmpeqb ymm1, ymm2, yword [rax] ; c5 ed 74 08
+
+vpcmpeqw ymm1, ymm3 ; c5 f5 75 cb
+vpcmpeqw ymm1, yword [rax] ; c5 f5 75 08
+vpcmpeqw ymm1, ymm2, ymm3 ; c5 ed 75 cb
+vpcmpeqw ymm1, ymm2, yword [rax] ; c5 ed 75 08
+
+vpcmpeqd ymm1, ymm3 ; c5 f5 76 cb
+vpcmpeqd ymm1, yword [rax] ; c5 f5 76 08
+vpcmpeqd ymm1, ymm2, ymm3 ; c5 ed 76 cb
+vpcmpeqd ymm1, ymm2, yword [rax] ; c5 ed 76 08
+
+vpcmpeqq ymm1, ymm3 ; c4 e2 75 29 cb
+vpcmpeqq ymm1, yword [rax] ; c4 e2 75 29 08
+vpcmpeqq ymm1, ymm2, ymm3 ; c4 e2 6d 29 cb
+vpcmpeqq ymm1, ymm2, yword [rax] ; c4 e2 6d 29 08
+
+vpcmpgtb ymm1, ymm3 ; c5 f5 64 cb
+vpcmpgtb ymm1, yword [rax] ; c5 f5 64 08
+vpcmpgtb ymm1, ymm2, ymm3 ; c5 ed 64 cb
+vpcmpgtb ymm1, ymm2, yword [rax] ; c5 ed 64 08
+
+vpcmpgtw ymm1, ymm3 ; c5 f5 65 cb
+vpcmpgtw ymm1, yword [rax] ; c5 f5 65 08
+vpcmpgtw ymm1, ymm2, ymm3 ; c5 ed 65 cb
+vpcmpgtw ymm1, ymm2, yword [rax] ; c5 ed 65 08
+
+vpcmpgtd ymm1, ymm3 ; c5 f5 66 cb
+vpcmpgtd ymm1, yword [rax] ; c5 f5 66 08
+vpcmpgtd ymm1, ymm2, ymm3 ; c5 ed 66 cb
+vpcmpgtd ymm1, ymm2, yword [rax] ; c5 ed 66 08
+
+vpcmpgtq ymm1, ymm3 ; c4 e2 75 37 cb
+vpcmpgtq ymm1, yword [rax] ; c4 e2 75 37 08
+vpcmpgtq ymm1, ymm2, ymm3 ; c4 e2 6d 37 cb
+vpcmpgtq ymm1, ymm2, yword [rax] ; c4 e2 6d 37 08
+
+vphaddw ymm1, ymm3 ; c4 e2 75 01 cb
+vphaddw ymm1, yword [rax] ; c4 e2 75 01 08
+vphaddw ymm1, ymm2, ymm3 ; c4 e2 6d 01 cb
+vphaddw ymm1, ymm2, yword [rax] ; c4 e2 6d 01 08
+
+vphaddd ymm1, ymm3 ; c4 e2 75 02 cb
+vphaddd ymm1, yword [rax] ; c4 e2 75 02 08
+vphaddd ymm1, ymm2, ymm3 ; c4 e2 6d 02 cb
+vphaddd ymm1, ymm2, yword [rax] ; c4 e2 6d 02 08
+
+vphaddsw ymm1, ymm3 ; c4 e2 75 03 cb
+vphaddsw ymm1, yword [rax] ; c4 e2 75 03 08
+vphaddsw ymm1, ymm2, ymm3 ; c4 e2 6d 03 cb
+vphaddsw ymm1, ymm2, yword [rax] ; c4 e2 6d 03 08
+
+vphsubw ymm1, ymm3 ; c4 e2 75 05 cb
+vphsubw ymm1, yword [rax] ; c4 e2 75 05 08
+vphsubw ymm1, ymm2, ymm3 ; c4 e2 6d 05 cb
+vphsubw ymm1, ymm2, yword [rax] ; c4 e2 6d 05 08
+
+vphsubd ymm1, ymm3 ; c4 e2 75 06 cb
+vphsubd ymm1, yword [rax] ; c4 e2 75 06 08
+vphsubd ymm1, ymm2, ymm3 ; c4 e2 6d 06 cb
+vphsubd ymm1, ymm2, yword [rax] ; c4 e2 6d 06 08
+
+vphsubsw ymm1, ymm3 ; c4 e2 75 07 cb
+vphsubsw ymm1, yword [rax] ; c4 e2 75 07 08
+vphsubsw ymm1, ymm2, ymm3 ; c4 e2 6d 07 cb
+vphsubsw ymm1, ymm2, yword [rax] ; c4 e2 6d 07 08
+
+vpmaddubsw ymm1, ymm3 ; c4 e2 75 04 cb
+vpmaddubsw ymm1, yword [rax] ; c4 e2 75 04 08
+vpmaddubsw ymm1, ymm2, ymm3 ; c4 e2 6d 04 cb
+vpmaddubsw ymm1, ymm2, yword [rax] ; c4 e2 6d 04 08
+
+vpmaddwd ymm1, ymm3 ; c5 f5 f5 cb
+vpmaddwd ymm1, yword [rax] ; c5 f5 f5 08
+vpmaddwd ymm1, ymm2, ymm3 ; c5 ed f5 cb
+vpmaddwd ymm1, ymm2, yword [rax] ; c5 ed f5 08
+
+vpmaxsb ymm1, ymm3 ; c4 e2 75 3c cb
+vpmaxsb ymm1, yword [rax] ; c4 e2 75 3c 08
+vpmaxsb ymm1, ymm2, ymm3 ; c4 e2 6d 3c cb
+vpmaxsb ymm1, ymm2, yword [rax] ; c4 e2 6d 3c 08
+
+vpmaxsw ymm1, ymm3 ; c5 f5 ee cb
+vpmaxsw ymm1, yword [rax] ; c5 f5 ee 08
+vpmaxsw ymm1, ymm2, ymm3 ; c5 ed ee cb
+vpmaxsw ymm1, ymm2, yword [rax] ; c5 ed ee 08
+
+vpmaxsd ymm1, ymm3 ; c4 e2 75 3d cb
+vpmaxsd ymm1, yword [rax] ; c4 e2 75 3d 08
+vpmaxsd ymm1, ymm2, ymm3 ; c4 e2 6d 3d cb
+vpmaxsd ymm1, ymm2, yword [rax] ; c4 e2 6d 3d 08
+
+vpmaxub ymm1, ymm3 ; c5 f5 de cb
+vpmaxub ymm1, yword [rax] ; c5 f5 de 08
+vpmaxub ymm1, ymm2, ymm3 ; c5 ed de cb
+vpmaxub ymm1, ymm2, yword [rax] ; c5 ed de 08
+
+vpmaxuw ymm1, ymm3 ; c4 e2 75 3e cb
+vpmaxuw ymm1, yword [rax] ; c4 e2 75 3e 08
+vpmaxuw ymm1, ymm2, ymm3 ; c4 e2 6d 3e cb
+vpmaxuw ymm1, ymm2, yword [rax] ; c4 e2 6d 3e 08
+
+vpmaxud ymm1, ymm3 ; c4 e2 75 3f cb
+vpmaxud ymm1, yword [rax] ; c4 e2 75 3f 08
+vpmaxud ymm1, ymm2, ymm3 ; c4 e2 6d 3f cb
+vpmaxud ymm1, ymm2, yword [rax] ; c4 e2 6d 3f 08
+
+vpminsb ymm1, ymm3 ; c4 e2 75 38 cb
+vpminsb ymm1, yword [rax] ; c4 e2 75 38 08
+vpminsb ymm1, ymm2, ymm3 ; c4 e2 6d 38 cb
+vpminsb ymm1, ymm2, yword [rax] ; c4 e2 6d 38 08
+
+vpminsw ymm1, ymm3 ; c5 f5 ea cb
+vpminsw ymm1, yword [rax] ; c5 f5 ea 08
+vpminsw ymm1, ymm2, ymm3 ; c5 ed ea cb
+vpminsw ymm1, ymm2, yword [rax] ; c5 ed ea 08
+
+vpminsd ymm1, ymm3 ; c4 e2 75 39 cb
+vpminsd ymm1, yword [rax] ; c4 e2 75 39 08
+vpminsd ymm1, ymm2, ymm3 ; c4 e2 6d 39 cb
+vpminsd ymm1, ymm2, yword [rax] ; c4 e2 6d 39 08
+
+vpminub ymm1, ymm3 ; c5 f5 da cb
+vpminub ymm1, yword [rax] ; c5 f5 da 08
+vpminub ymm1, ymm2, ymm3 ; c5 ed da cb
+vpminub ymm1, ymm2, yword [rax] ; c5 ed da 08
+
+vpminuw ymm1, ymm3 ; c4 e2 75 3a cb
+vpminuw ymm1, yword [rax] ; c4 e2 75 3a 08
+vpminuw ymm1, ymm2, ymm3 ; c4 e2 6d 3a cb
+vpminuw ymm1, ymm2, yword [rax] ; c4 e2 6d 3a 08
+
+vpminud ymm1, ymm3 ; c4 e2 75 3b cb
+vpminud ymm1, yword [rax] ; c4 e2 75 3b 08
+vpminud ymm1, ymm2, ymm3 ; c4 e2 6d 3b cb
+vpminud ymm1, ymm2, yword [rax] ; c4 e2 6d 3b 08
+
+vpmovmskb eax, ymm1 ; c5 fd d7 c1
+vpmovmskb rax, ymm1 ; c5 fd d7 c1
+
+vpmovsxbw ymm1, xmm2 ; c4 e2 7d 20 ca
+vpmovsxbw ymm1, [rax] ; c4 e2 7d 20 08
+vpmovsxbw ymm1, oword [rax] ; c4 e2 7d 20 08
+
+vpmovsxbd ymm1, xmm2 ; c4 e2 7d 21 ca
+vpmovsxbd ymm1, [rax] ; c4 e2 7d 21 08
+vpmovsxbd ymm1, qword [rax] ; c4 e2 7d 21 08
+
+vpmovsxbq ymm1, xmm2 ; c4 e2 7d 22 ca
+vpmovsxbq ymm1, [rax] ; c4 e2 7d 22 08
+vpmovsxbq ymm1, dword [rax] ; c4 e2 7d 22 08
+
+vpmovsxwd ymm1, xmm2 ; c4 e2 7d 23 ca
+vpmovsxwd ymm1, [rax] ; c4 e2 7d 23 08
+vpmovsxwd ymm1, oword [rax] ; c4 e2 7d 23 08
+
+vpmovsxwq ymm1, xmm2 ; c4 e2 7d 24 ca
+vpmovsxwq ymm1, [rax] ; c4 e2 7d 24 08
+vpmovsxwq ymm1, qword [rax] ; c4 e2 7d 24 08
+
+vpmovsxdq ymm1, xmm2 ; c4 e2 7d 25 ca
+vpmovsxdq ymm1, [rax] ; c4 e2 7d 25 08
+vpmovsxdq ymm1, oword [rax] ; c4 e2 7d 25 08
+
+vpmovzxbw ymm1, xmm2 ; c4 e2 7d 30 ca
+vpmovzxbw ymm1, [rax] ; c4 e2 7d 30 08
+vpmovzxbw ymm1, oword [rax] ; c4 e2 7d 30 08
+
+vpmovzxbd ymm1, xmm2 ; c4 e2 7d 31 ca
+vpmovzxbd ymm1, [rax] ; c4 e2 7d 31 08
+vpmovzxbd ymm1, qword [rax] ; c4 e2 7d 31 08
+
+vpmovzxbq ymm1, xmm2 ; c4 e2 7d 32 ca
+vpmovzxbq ymm1, [rax] ; c4 e2 7d 32 08
+vpmovzxbq ymm1, dword [rax] ; c4 e2 7d 32 08
+
+vpmovzxwd ymm1, xmm2 ; c4 e2 7d 33 ca
+vpmovzxwd ymm1, [rax] ; c4 e2 7d 33 08
+vpmovzxwd ymm1, oword [rax] ; c4 e2 7d 33 08
+
+vpmovzxwq ymm1, xmm2 ; c4 e2 7d 34 ca
+vpmovzxwq ymm1, [rax] ; c4 e2 7d 34 08
+vpmovzxwq ymm1, qword [rax] ; c4 e2 7d 34 08
+
+vpmovzxdq ymm1, xmm2 ; c4 e2 7d 35 ca
+vpmovzxdq ymm1, [rax] ; c4 e2 7d 35 08
+vpmovzxdq ymm1, oword [rax] ; c4 e2 7d 35 08
+
+vpmuldq ymm1, ymm3 ; c4 e2 75 28 cb
+vpmuldq ymm1, yword [rax] ; c4 e2 75 28 08
+vpmuldq ymm1, ymm2, ymm3 ; c4 e2 6d 28 cb
+vpmuldq ymm1, ymm2, yword [rax] ; c4 e2 6d 28 08
+
+vpmulhrsw ymm1, ymm3 ; c4 e2 75 0b cb
+vpmulhrsw ymm1, yword [rax] ; c4 e2 75 0b 08
+vpmulhrsw ymm1, ymm2, ymm3 ; c4 e2 6d 0b cb
+vpmulhrsw ymm1, ymm2, yword [rax] ; c4 e2 6d 0b 08
+
+vpmulhuw ymm1, ymm3 ; c5 f5 e4 cb
+vpmulhuw ymm1, yword [rax] ; c5 f5 e4 08
+vpmulhuw ymm1, ymm2, ymm3 ; c5 ed e4 cb
+vpmulhuw ymm1, ymm2, yword [rax] ; c5 ed e4 08
+
+vpmulhw ymm1, ymm3 ; c5 f5 e5 cb
+vpmulhw ymm1, yword [rax] ; c5 f5 e5 08
+vpmulhw ymm1, ymm2, ymm3 ; c5 ed e5 cb
+vpmulhw ymm1, ymm2, yword [rax] ; c5 ed e5 08
+
+vpmullw ymm1, ymm3 ; c5 f5 d5 cb
+vpmullw ymm1, yword [rax] ; c5 f5 d5 08
+vpmullw ymm1, ymm2, ymm3 ; c5 ed d5 cb
+vpmullw ymm1, ymm2, yword [rax] ; c5 ed d5 08
+
+vpmulld ymm1, ymm3 ; c4 e2 75 40 cb
+vpmulld ymm1, yword [rax] ; c4 e2 75 40 08
+vpmulld ymm1, ymm2, ymm3 ; c4 e2 6d 40 cb
+vpmulld ymm1, ymm2, yword [rax] ; c4 e2 6d 40 08
+
+vpmuludq ymm1, ymm3 ; c5 f5 f4 cb
+vpmuludq ymm1, yword [rax] ; c5 f5 f4 08
+vpmuludq ymm1, ymm2, ymm3 ; c5 ed f4 cb
+vpmuludq ymm1, ymm2, yword [rax] ; c5 ed f4 08
+
+vpor ymm1, ymm3 ; c5 f5 eb cb
+vpor ymm1, yword [rax] ; c5 f5 eb 08
+vpor ymm1, ymm2, ymm3 ; c5 ed eb cb
+vpor ymm1, ymm2, yword [rax] ; c5 ed eb 08
+
+vpsadbw ymm1, ymm3 ; c5 f5 f6 cb
+vpsadbw ymm1, yword [rax] ; c5 f5 f6 08
+vpsadbw ymm1, ymm2, ymm3 ; c5 ed f6 cb
+vpsadbw ymm1, ymm2, yword [rax] ; c5 ed f6 08
+
+vpshufb ymm1, ymm3 ; c4 e2 75 00 cb
+vpshufb ymm1, yword [rax] ; c4 e2 75 00 08
+vpshufb ymm1, ymm2, ymm3 ; c4 e2 6d 00 cb
+vpshufb ymm1, ymm2, yword [rax] ; c4 e2 6d 00 08
+
+vpshufd ymm1, ymm3, 3 ; c5 fd 70 cb 03
+vpshufd ymm1, yword [rax], 3 ; c5 fd 70 08 03
+
+vpshufhw ymm1, ymm3, 3 ; c5 fe 70 cb 03
+vpshufhw ymm1, yword [rax], 3 ; c5 fe 70 08 03
+
+vpshuflw ymm1, ymm3, 3 ; c5 ff 70 cb 03
+vpshuflw ymm1, yword [rax], 3 ; c5 ff 70 08 03
+
+vpsignb ymm1, ymm3 ; c4 e2 75 08 cb
+vpsignb ymm1, yword [rax] ; c4 e2 75 08 08
+vpsignb ymm1, ymm2, ymm3 ; c4 e2 6d 08 cb
+vpsignb ymm1, ymm2, yword [rax] ; c4 e2 6d 08 08
+
+vpsignw ymm1, ymm3 ; c4 e2 75 09 cb
+vpsignw ymm1, yword [rax] ; c4 e2 75 09 08
+vpsignw ymm1, ymm2, ymm3 ; c4 e2 6d 09 cb
+vpsignw ymm1, ymm2, yword [rax] ; c4 e2 6d 09 08
+
+vpsignd ymm1, ymm3 ; c4 e2 75 0a cb
+vpsignd ymm1, yword [rax] ; c4 e2 75 0a 08
+vpsignd ymm1, ymm2, ymm3 ; c4 e2 6d 0a cb
+vpsignd ymm1, ymm2, yword [rax] ; c4 e2 6d 0a 08
+
+vpslldq ymm1, 3 ; c5 f5 73 f9 03
+vpslldq ymm1, ymm2, 3 ; c5 f5 73 fa 03
+
+vpsllw ymm1, xmm3 ; c5 f5 f1 cb
+vpsllw ymm1, oword [rax] ; c5 f5 f1 08
+vpsllw ymm1, 3 ; c5 f5 71 f1 03
+vpsllw ymm1, ymm2, xmm3 ; c5 ed f1 cb
+vpsllw ymm1, ymm2, oword [rax] ; c5 ed f1 08
+vpsllw ymm1, ymm2, 3 ; c5 f5 71 f2 03
+
+vpslld ymm1, xmm3 ; c5 f5 f2 cb
+vpslld ymm1, oword [rax] ; c5 f5 f2 08
+vpslld ymm1, 3 ; c5 f5 72 f1 03
+vpslld ymm1, ymm2, xmm3 ; c5 ed f2 cb
+vpslld ymm1, ymm2, oword [rax] ; c5 ed f2 08
+vpslld ymm1, ymm2, 3 ; c5 f5 72 f2 03
+
+vpsllq ymm1, xmm3 ; c5 f5 f3 cb
+vpsllq ymm1, oword [rax] ; c5 f5 f3 08
+vpsllq ymm1, 3 ; c5 f5 73 f1 03
+vpsllq ymm1, ymm2, xmm3 ; c5 ed f3 cb
+vpsllq ymm1, ymm2, oword [rax] ; c5 ed f3 08
+vpsllq ymm1, ymm2, 3 ; c5 f5 73 f2 03
+
+vpsraw ymm1, xmm3 ; c5 f5 e1 cb
+vpsraw ymm1, oword [rax] ; c5 f5 e1 08
+vpsraw ymm1, 3 ; c5 f5 71 e1 03
+vpsraw ymm1, ymm2, xmm3 ; c5 ed e1 cb
+vpsraw ymm1, ymm2, oword [rax] ; c5 ed e1 08
+vpsraw ymm1, ymm2, 3 ; c5 f5 71 e2 03
+
+vpsrad ymm1, xmm3 ; c5 f5 e2 cb
+vpsrad ymm1, oword [rax] ; c5 f5 e2 08
+vpsrad ymm1, 3 ; c5 f5 72 e1 03
+vpsrad ymm1, ymm2, xmm3 ; c5 ed e2 cb
+vpsrad ymm1, ymm2, oword [rax] ; c5 ed e2 08
+vpsrad ymm1, ymm2, 3 ; c5 f5 72 e2 03
+
+vpsrldq ymm1, 3 ; c5 f5 73 d9 03
+vpsrldq ymm1, ymm2, 3 ; c5 f5 73 da 03
+
+vpsrlw ymm1, xmm3 ; c5 f5 d1 cb
+vpsrlw ymm1, oword [rax] ; c5 f5 d1 08
+vpsrlw ymm1, 3 ; c5 f5 71 d1 03
+vpsrlw ymm1, ymm2, xmm3 ; c5 ed d1 cb
+vpsrlw ymm1, ymm2, oword [rax] ; c5 ed d1 08
+vpsrlw ymm1, ymm2, 3 ; c5 f5 71 d2 03
+
+vpsrld ymm1, xmm3 ; c5 f5 d2 cb
+vpsrld ymm1, oword [rax] ; c5 f5 d2 08
+vpsrld ymm1, 3 ; c5 f5 72 d1 03
+vpsrld ymm1, ymm2, xmm3 ; c5 ed d2 cb
+vpsrld ymm1, ymm2, oword [rax] ; c5 ed d2 08
+vpsrld ymm1, ymm2, 3 ; c5 f5 72 d2 03
+
+vpsrld ymm1, xmm3 ; c5 f5 d2 cb
+vpsrld ymm1, oword [rax] ; c5 f5 d2 08
+vpsrld ymm1, 3 ; c5 f5 72 d1 03
+vpsrld ymm1, ymm2, xmm3 ; c5 ed d2 cb
+vpsrld ymm1, ymm2, oword [rax] ; c5 ed d2 08
+vpsrld ymm1, ymm2, 3 ; c5 f5 72 d2 03
+
+vpsubsb ymm1, ymm3 ; c5 f5 e8 cb
+vpsubsb ymm1, yword [rax] ; c5 f5 e8 08
+vpsubsb ymm1, ymm2, ymm3 ; c5 ed e8 cb
+vpsubsb ymm1, ymm2, yword [rax] ; c5 ed e8 08
+
+vpsubsw ymm1, ymm3 ; c5 f5 e9 cb
+vpsubsw ymm1, yword [rax] ; c5 f5 e9 08
+vpsubsw ymm1, ymm2, ymm3 ; c5 ed e9 cb
+vpsubsw ymm1, ymm2, yword [rax] ; c5 ed e9 08
+
+vpsubusb ymm1, ymm3 ; c5 f5 d8 cb
+vpsubusb ymm1, yword [rax] ; c5 f5 d8 08
+vpsubusb ymm1, ymm2, ymm3 ; c5 ed d8 cb
+vpsubusb ymm1, ymm2, yword [rax] ; c5 ed d8 08
+
+vpsubusw ymm1, ymm3 ; c5 f5 d9 cb
+vpsubusw ymm1, yword [rax] ; c5 f5 d9 08
+vpsubusw ymm1, ymm2, ymm3 ; c5 ed d9 cb
+vpsubusw ymm1, ymm2, yword [rax] ; c5 ed d9 08
+
+vpunpckhbw ymm1, ymm3 ; c5 f5 68 cb
+vpunpckhbw ymm1, yword [rax] ; c5 f5 68 08
+vpunpckhbw ymm1, ymm2, ymm3 ; c5 ed 68 cb
+vpunpckhbw ymm1, ymm2, yword [rax] ; c5 ed 68 08
+
+vpunpckhwd ymm1, ymm3 ; c5 f5 69 cb
+vpunpckhwd ymm1, yword [rax] ; c5 f5 69 08
+vpunpckhwd ymm1, ymm2, ymm3 ; c5 ed 69 cb
+vpunpckhwd ymm1, ymm2, yword [rax] ; c5 ed 69 08
+
+vpunpckhdq ymm1, ymm3 ; c5 f5 6a cb
+vpunpckhdq ymm1, yword [rax] ; c5 f5 6a 08
+vpunpckhdq ymm1, ymm2, ymm3 ; c5 ed 6a cb
+vpunpckhdq ymm1, ymm2, yword [rax] ; c5 ed 6a 08
+
+vpunpckhqdq ymm1, ymm3 ; c5 f5 6d cb
+vpunpckhqdq ymm1, yword [rax] ; c5 f5 6d 08
+vpunpckhqdq ymm1, ymm2, ymm3 ; c5 ed 6d cb
+vpunpckhqdq ymm1, ymm2, yword [rax] ; c5 ed 6d 08
+
+vpunpcklbw ymm1, ymm3 ; c5 f5 60 cb
+vpunpcklbw ymm1, yword [rax] ; c5 f5 60 08
+vpunpcklbw ymm1, ymm2, ymm3 ; c5 ed 60 cb
+vpunpcklbw ymm1, ymm2, yword [rax] ; c5 ed 60 08
+
+vpunpcklwd ymm1, ymm3 ; c5 f5 61 cb
+vpunpcklwd ymm1, yword [rax] ; c5 f5 61 08
+vpunpcklwd ymm1, ymm2, ymm3 ; c5 ed 61 cb
+vpunpcklwd ymm1, ymm2, yword [rax] ; c5 ed 61 08
+
+vpunpckldq ymm1, ymm3 ; c5 f5 62 cb
+vpunpckldq ymm1, yword [rax] ; c5 f5 62 08
+vpunpckldq ymm1, ymm2, ymm3 ; c5 ed 62 cb
+vpunpckldq ymm1, ymm2, yword [rax] ; c5 ed 62 08
+
+vpunpcklqdq ymm1, ymm3 ; c5 f5 6c cb
+vpunpcklqdq ymm1, yword [rax] ; c5 f5 6c 08
+vpunpcklqdq ymm1, ymm2, ymm3 ; c5 ed 6c cb
+vpunpcklqdq ymm1, ymm2, yword [rax] ; c5 ed 6c 08
+
+vpxor ymm1, ymm3 ; c5 f5 ef cb
+vpxor ymm1, yword [rax] ; c5 f5 ef 08
+vpxor ymm1, ymm2, ymm3 ; c5 ed ef cb
+vpxor ymm1, ymm2, yword [rax] ; c5 ed ef 08
+
+vmovntdqa ymm1, yword [rax] ; c4 e2 7d 2a 08
+
+vbroadcastss xmm1, xmm2 ; c4 e2 79 18 ca
+vbroadcastss ymm1, xmm2 ; c4 e2 7d 18 ca
+
+vbroadcastsd ymm1, xmm2 ; c4 e2 7d 19 ca
+
+vbroadcasti128 ymm1, oword [rax] ; c4 e2 7d 5a 08
+
+vpblendd ymm1, ymm2, ymm3, 3 ; c4 e3 6d 02 cb 03
+vpblendd ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 02 08 03
+
+vpbroadcastb xmm1, xmm2 ; c4 e2 79 78 ca
+vpbroadcastb xmm1, byte [rax] ; c4 e2 79 78 08
+vpbroadcastb ymm1, xmm2 ; c4 e2 7d 78 ca
+vpbroadcastb ymm1, byte [rax] ; c4 e2 7d 78 08
+
+vpbroadcastw xmm1, xmm2 ; c4 e2 79 79 ca
+vpbroadcastw xmm1, word [rax] ; c4 e2 79 79 08
+vpbroadcastw ymm1, xmm2 ; c4 e2 7d 79 ca
+vpbroadcastw ymm1, word [rax] ; c4 e2 7d 79 08
+
+vpbroadcastd xmm1, xmm2 ; c4 e2 79 58 ca
+vpbroadcastd xmm1, dword [rax] ; c4 e2 79 58 08
+vpbroadcastd ymm1, xmm2 ; c4 e2 7d 58 ca
+vpbroadcastd ymm1, dword [rax] ; c4 e2 7d 58 08
+
+vpbroadcastq xmm1, xmm2 ; c4 e2 79 59 ca
+vpbroadcastq xmm1, qword [rax] ; c4 e2 79 59 08
+vpbroadcastq ymm1, xmm2 ; c4 e2 7d 59 ca
+vpbroadcastq ymm1, qword [rax] ; c4 e2 7d 59 08
+
+vpermd ymm1, ymm2, ymm3 ; c4 e2 6d 36 cb
+vpermd ymm1, ymm2, yword [rax] ; c4 e2 6d 36 08
+
+vpermpd ymm1, ymm2, 3 ; c4 e3 fd 01 ca 03
+vpermpd ymm1, yword [rax], 3 ; c4 e3 fd 01 08 03
+
+vpermps ymm1, ymm2, ymm3 ; c4 e2 6d 16 cb
+vpermps ymm1, ymm2, yword [rax] ; c4 e2 6d 16 08
+
+vpermq ymm1, ymm2, 3 ; c4 e3 fd 00 ca 03
+vpermq ymm1, yword [rax], 3 ; c4 e3 fd 00 08 03
+
+vperm2i128 ymm1, ymm2, ymm3, 3 ; c4 e3 6d 46 cb 03
+vperm2i128 ymm1, ymm2, yword [rax], 3 ; c4 e3 6d 46 08 03
+
+vextracti128 xmm1, ymm2, 3 ; c4 e3 7d 39 d1 03
+vextracti128 oword [rax], ymm2, 3 ; c4 e3 7d 39 10 03
+
+vinserti128 ymm1, ymm2, xmm3, 3 ; c4 e3 6d 38 cb 03
+vinserti128 ymm1, ymm2, oword [rax], 3 ; c4 e3 6d 38 08 03
+
+vpmaskmovd xmm1, xmm2, oword [rax] ; c4 e2 69 8c 08
+vpmaskmovd ymm1, ymm2, yword [rax] ; c4 e2 6d 8c 08
+vpmaskmovd oword [rax], xmm1, xmm2 ; c4 e2 71 8e 10
+vpmaskmovd yword [rax], ymm1, ymm2 ; c4 e2 75 8e 10
+
+vpmaskmovq xmm1, xmm2, oword [rax] ; c4 e2 e9 8c 08
+vpmaskmovq ymm1, ymm2, yword [rax] ; c4 e2 ed 8c 08
+vpmaskmovq oword [rax], xmm1, xmm2 ; c4 e2 f1 8e 10
+vpmaskmovq yword [rax], ymm1, ymm2 ; c4 e2 f5 8e 10
+
+vpsllvd xmm1, xmm2, xmm3 ; c4 e2 69 47 cb
+vpsllvd xmm1, xmm2, oword [rax] ; c4 e2 69 47 08
+vpsllvd ymm1, ymm2, ymm3 ; c4 e2 6d 47 cb
+vpsllvd ymm1, ymm2, yword [rax] ; c4 e2 6d 47 08
+
+vpsllvq xmm1, xmm2, xmm3 ; c4 e2 e9 47 cb
+vpsllvq xmm1, xmm2, oword [rax] ; c4 e2 e9 47 08
+vpsllvq ymm1, ymm2, ymm3 ; c4 e2 ed 47 cb
+vpsllvq ymm1, ymm2, yword [rax] ; c4 e2 ed 47 08
+
+vpsravd xmm1, xmm2, xmm3 ; c4 e2 69 46 cb
+vpsravd xmm1, xmm2, oword [rax] ; c4 e2 69 46 08
+vpsravd ymm1, ymm2, ymm3 ; c4 e2 6d 46 cb
+vpsravd ymm1, ymm2, yword [rax] ; c4 e2 6d 46 08
+
+vpsrlvd xmm1, xmm2, xmm3 ; c4 e2 69 45 cb
+vpsrlvd xmm1, xmm2, oword [rax] ; c4 e2 69 45 08
+vpsrlvd ymm1, ymm2, ymm3 ; c4 e2 6d 45 cb
+vpsrlvd ymm1, ymm2, yword [rax] ; c4 e2 6d 45 08
+
+vpsrlvq xmm1, xmm2, xmm3 ; c4 e2 e9 45 cb
+vpsrlvq xmm1, xmm2, oword [rax] ; c4 e2 e9 45 08
+vpsrlvq ymm1, ymm2, ymm3 ; c4 e2 ed 45 cb
+vpsrlvq ymm1, ymm2, yword [rax] ; c4 e2 ed 45 08
diff --git a/modules/arch/x86/tests/avx2.hex b/modules/arch/x86/tests/avx2.hex
new file mode 100644
index 00000000..3d9e9cd4
--- /dev/null
+++ b/modules/arch/x86/tests/avx2.hex
@@ -0,0 +1,2105 @@
+c4
+e3
+75
+42
+cb
+03
+c4
+e3
+75
+42
+08
+03
+c4
+e3
+6d
+42
+cb
+03
+c4
+e3
+6d
+42
+08
+03
+c4
+e2
+7d
+1c
+ca
+c4
+e2
+7d
+1c
+08
+c4
+e2
+7d
+1d
+ca
+c4
+e2
+7d
+1d
+08
+c4
+e2
+7d
+1e
+ca
+c4
+e2
+7d
+1e
+08
+c5
+f5
+63
+cb
+c5
+f5
+63
+08
+c5
+ed
+63
+cb
+c5
+ed
+63
+08
+c5
+f5
+6b
+cb
+c5
+f5
+6b
+08
+c5
+ed
+6b
+cb
+c5
+ed
+6b
+08
+c4
+e2
+75
+2b
+cb
+c4
+e2
+75
+2b
+08
+c4
+e2
+6d
+2b
+cb
+c4
+e2
+6d
+2b
+08
+c5
+f5
+67
+cb
+c5
+f5
+67
+08
+c5
+ed
+67
+cb
+c5
+ed
+67
+08
+c5
+f5
+fc
+cb
+c5
+f5
+fc
+08
+c5
+ed
+fc
+cb
+c5
+ed
+fc
+08
+c5
+f5
+fd
+cb
+c5
+f5
+fd
+08
+c5
+ed
+fd
+cb
+c5
+ed
+fd
+08
+c5
+f5
+fe
+cb
+c5
+f5
+fe
+08
+c5
+ed
+fe
+cb
+c5
+ed
+fe
+08
+c5
+f5
+d4
+cb
+c5
+f5
+d4
+08
+c5
+ed
+d4
+cb
+c5
+ed
+d4
+08
+c5
+f5
+ec
+cb
+c5
+f5
+ec
+08
+c5
+ed
+ec
+cb
+c5
+ed
+ec
+08
+c5
+f5
+ed
+cb
+c5
+f5
+ed
+08
+c5
+ed
+ed
+cb
+c5
+ed
+ed
+08
+c5
+f5
+dc
+cb
+c5
+f5
+dc
+08
+c5
+ed
+dc
+cb
+c5
+ed
+dc
+08
+c5
+f5
+dd
+cb
+c5
+f5
+dd
+08
+c5
+ed
+dd
+cb
+c5
+ed
+dd
+08
+c4
+e3
+6d
+0f
+cb
+03
+c4
+e3
+6d
+0f
+08
+03
+c5
+f5
+db
+cb
+c5
+f5
+db
+08
+c5
+ed
+db
+cb
+c5
+ed
+db
+08
+c5
+f5
+df
+cb
+c5
+f5
+df
+08
+c5
+ed
+df
+cb
+c5
+ed
+df
+08
+c5
+f5
+e0
+cb
+c5
+f5
+e0
+08
+c5
+ed
+e0
+cb
+c5
+ed
+e0
+08
+c5
+f5
+e3
+cb
+c5
+f5
+e3
+08
+c5
+ed
+e3
+cb
+c5
+ed
+e3
+08
+c4
+e3
+6d
+4c
+cb
+40
+c4
+e3
+6d
+4c
+08
+40
+c4
+e3
+75
+0e
+cb
+03
+c4
+e3
+75
+0e
+08
+03
+c4
+e3
+6d
+0e
+cb
+03
+c4
+e3
+6d
+0e
+08
+03
+c5
+f5
+74
+cb
+c5
+f5
+74
+08
+c5
+ed
+74
+cb
+c5
+ed
+74
+08
+c5
+f5
+75
+cb
+c5
+f5
+75
+08
+c5
+ed
+75
+cb
+c5
+ed
+75
+08
+c5
+f5
+76
+cb
+c5
+f5
+76
+08
+c5
+ed
+76
+cb
+c5
+ed
+76
+08
+c4
+e2
+75
+29
+cb
+c4
+e2
+75
+29
+08
+c4
+e2
+6d
+29
+cb
+c4
+e2
+6d
+29
+08
+c5
+f5
+64
+cb
+c5
+f5
+64
+08
+c5
+ed
+64
+cb
+c5
+ed
+64
+08
+c5
+f5
+65
+cb
+c5
+f5
+65
+08
+c5
+ed
+65
+cb
+c5
+ed
+65
+08
+c5
+f5
+66
+cb
+c5
+f5
+66
+08
+c5
+ed
+66
+cb
+c5
+ed
+66
+08
+c4
+e2
+75
+37
+cb
+c4
+e2
+75
+37
+08
+c4
+e2
+6d
+37
+cb
+c4
+e2
+6d
+37
+08
+c4
+e2
+75
+01
+cb
+c4
+e2
+75
+01
+08
+c4
+e2
+6d
+01
+cb
+c4
+e2
+6d
+01
+08
+c4
+e2
+75
+02
+cb
+c4
+e2
+75
+02
+08
+c4
+e2
+6d
+02
+cb
+c4
+e2
+6d
+02
+08
+c4
+e2
+75
+03
+cb
+c4
+e2
+75
+03
+08
+c4
+e2
+6d
+03
+cb
+c4
+e2
+6d
+03
+08
+c4
+e2
+75
+05
+cb
+c4
+e2
+75
+05
+08
+c4
+e2
+6d
+05
+cb
+c4
+e2
+6d
+05
+08
+c4
+e2
+75
+06
+cb
+c4
+e2
+75
+06
+08
+c4
+e2
+6d
+06
+cb
+c4
+e2
+6d
+06
+08
+c4
+e2
+75
+07
+cb
+c4
+e2
+75
+07
+08
+c4
+e2
+6d
+07
+cb
+c4
+e2
+6d
+07
+08
+c4
+e2
+75
+04
+cb
+c4
+e2
+75
+04
+08
+c4
+e2
+6d
+04
+cb
+c4
+e2
+6d
+04
+08
+c5
+f5
+f5
+cb
+c5
+f5
+f5
+08
+c5
+ed
+f5
+cb
+c5
+ed
+f5
+08
+c4
+e2
+75
+3c
+cb
+c4
+e2
+75
+3c
+08
+c4
+e2
+6d
+3c
+cb
+c4
+e2
+6d
+3c
+08
+c5
+f5
+ee
+cb
+c5
+f5
+ee
+08
+c5
+ed
+ee
+cb
+c5
+ed
+ee
+08
+c4
+e2
+75
+3d
+cb
+c4
+e2
+75
+3d
+08
+c4
+e2
+6d
+3d
+cb
+c4
+e2
+6d
+3d
+08
+c5
+f5
+de
+cb
+c5
+f5
+de
+08
+c5
+ed
+de
+cb
+c5
+ed
+de
+08
+c4
+e2
+75
+3e
+cb
+c4
+e2
+75
+3e
+08
+c4
+e2
+6d
+3e
+cb
+c4
+e2
+6d
+3e
+08
+c4
+e2
+75
+3f
+cb
+c4
+e2
+75
+3f
+08
+c4
+e2
+6d
+3f
+cb
+c4
+e2
+6d
+3f
+08
+c4
+e2
+75
+38
+cb
+c4
+e2
+75
+38
+08
+c4
+e2
+6d
+38
+cb
+c4
+e2
+6d
+38
+08
+c5
+f5
+ea
+cb
+c5
+f5
+ea
+08
+c5
+ed
+ea
+cb
+c5
+ed
+ea
+08
+c4
+e2
+75
+39
+cb
+c4
+e2
+75
+39
+08
+c4
+e2
+6d
+39
+cb
+c4
+e2
+6d
+39
+08
+c5
+f5
+da
+cb
+c5
+f5
+da
+08
+c5
+ed
+da
+cb
+c5
+ed
+da
+08
+c4
+e2
+75
+3a
+cb
+c4
+e2
+75
+3a
+08
+c4
+e2
+6d
+3a
+cb
+c4
+e2
+6d
+3a
+08
+c4
+e2
+75
+3b
+cb
+c4
+e2
+75
+3b
+08
+c4
+e2
+6d
+3b
+cb
+c4
+e2
+6d
+3b
+08
+c5
+fd
+d7
+c1
+c5
+fd
+d7
+c1
+c4
+e2
+7d
+20
+ca
+c4
+e2
+7d
+20
+08
+c4
+e2
+7d
+20
+08
+c4
+e2
+7d
+21
+ca
+c4
+e2
+7d
+21
+08
+c4
+e2
+7d
+21
+08
+c4
+e2
+7d
+22
+ca
+c4
+e2
+7d
+22
+08
+c4
+e2
+7d
+22
+08
+c4
+e2
+7d
+23
+ca
+c4
+e2
+7d
+23
+08
+c4
+e2
+7d
+23
+08
+c4
+e2
+7d
+24
+ca
+c4
+e2
+7d
+24
+08
+c4
+e2
+7d
+24
+08
+c4
+e2
+7d
+25
+ca
+c4
+e2
+7d
+25
+08
+c4
+e2
+7d
+25
+08
+c4
+e2
+7d
+30
+ca
+c4
+e2
+7d
+30
+08
+c4
+e2
+7d
+30
+08
+c4
+e2
+7d
+31
+ca
+c4
+e2
+7d
+31
+08
+c4
+e2
+7d
+31
+08
+c4
+e2
+7d
+32
+ca
+c4
+e2
+7d
+32
+08
+c4
+e2
+7d
+32
+08
+c4
+e2
+7d
+33
+ca
+c4
+e2
+7d
+33
+08
+c4
+e2
+7d
+33
+08
+c4
+e2
+7d
+34
+ca
+c4
+e2
+7d
+34
+08
+c4
+e2
+7d
+34
+08
+c4
+e2
+7d
+35
+ca
+c4
+e2
+7d
+35
+08
+c4
+e2
+7d
+35
+08
+c4
+e2
+75
+28
+cb
+c4
+e2
+75
+28
+08
+c4
+e2
+6d
+28
+cb
+c4
+e2
+6d
+28
+08
+c4
+e2
+75
+0b
+cb
+c4
+e2
+75
+0b
+08
+c4
+e2
+6d
+0b
+cb
+c4
+e2
+6d
+0b
+08
+c5
+f5
+e4
+cb
+c5
+f5
+e4
+08
+c5
+ed
+e4
+cb
+c5
+ed
+e4
+08
+c5
+f5
+e5
+cb
+c5
+f5
+e5
+08
+c5
+ed
+e5
+cb
+c5
+ed
+e5
+08
+c5
+f5
+d5
+cb
+c5
+f5
+d5
+08
+c5
+ed
+d5
+cb
+c5
+ed
+d5
+08
+c4
+e2
+75
+40
+cb
+c4
+e2
+75
+40
+08
+c4
+e2
+6d
+40
+cb
+c4
+e2
+6d
+40
+08
+c5
+f5
+f4
+cb
+c5
+f5
+f4
+08
+c5
+ed
+f4
+cb
+c5
+ed
+f4
+08
+c5
+f5
+eb
+cb
+c5
+f5
+eb
+08
+c5
+ed
+eb
+cb
+c5
+ed
+eb
+08
+c5
+f5
+f6
+cb
+c5
+f5
+f6
+08
+c5
+ed
+f6
+cb
+c5
+ed
+f6
+08
+c4
+e2
+75
+00
+cb
+c4
+e2
+75
+00
+08
+c4
+e2
+6d
+00
+cb
+c4
+e2
+6d
+00
+08
+c5
+fd
+70
+cb
+03
+c5
+fd
+70
+08
+03
+c5
+fe
+70
+cb
+03
+c5
+fe
+70
+08
+03
+c5
+ff
+70
+cb
+03
+c5
+ff
+70
+08
+03
+c4
+e2
+75
+08
+cb
+c4
+e2
+75
+08
+08
+c4
+e2
+6d
+08
+cb
+c4
+e2
+6d
+08
+08
+c4
+e2
+75
+09
+cb
+c4
+e2
+75
+09
+08
+c4
+e2
+6d
+09
+cb
+c4
+e2
+6d
+09
+08
+c4
+e2
+75
+0a
+cb
+c4
+e2
+75
+0a
+08
+c4
+e2
+6d
+0a
+cb
+c4
+e2
+6d
+0a
+08
+c5
+f5
+73
+f9
+03
+c5
+f5
+73
+fa
+03
+c5
+f5
+f1
+cb
+c5
+f5
+f1
+08
+c5
+f5
+71
+f1
+03
+c5
+ed
+f1
+cb
+c5
+ed
+f1
+08
+c5
+f5
+71
+f2
+03
+c5
+f5
+f2
+cb
+c5
+f5
+f2
+08
+c5
+f5
+72
+f1
+03
+c5
+ed
+f2
+cb
+c5
+ed
+f2
+08
+c5
+f5
+72
+f2
+03
+c5
+f5
+f3
+cb
+c5
+f5
+f3
+08
+c5
+f5
+73
+f1
+03
+c5
+ed
+f3
+cb
+c5
+ed
+f3
+08
+c5
+f5
+73
+f2
+03
+c5
+f5
+e1
+cb
+c5
+f5
+e1
+08
+c5
+f5
+71
+e1
+03
+c5
+ed
+e1
+cb
+c5
+ed
+e1
+08
+c5
+f5
+71
+e2
+03
+c5
+f5
+e2
+cb
+c5
+f5
+e2
+08
+c5
+f5
+72
+e1
+03
+c5
+ed
+e2
+cb
+c5
+ed
+e2
+08
+c5
+f5
+72
+e2
+03
+c5
+f5
+73
+d9
+03
+c5
+f5
+73
+da
+03
+c5
+f5
+d1
+cb
+c5
+f5
+d1
+08
+c5
+f5
+71
+d1
+03
+c5
+ed
+d1
+cb
+c5
+ed
+d1
+08
+c5
+f5
+71
+d2
+03
+c5
+f5
+d2
+cb
+c5
+f5
+d2
+08
+c5
+f5
+72
+d1
+03
+c5
+ed
+d2
+cb
+c5
+ed
+d2
+08
+c5
+f5
+72
+d2
+03
+c5
+f5
+d2
+cb
+c5
+f5
+d2
+08
+c5
+f5
+72
+d1
+03
+c5
+ed
+d2
+cb
+c5
+ed
+d2
+08
+c5
+f5
+72
+d2
+03
+c5
+f5
+e8
+cb
+c5
+f5
+e8
+08
+c5
+ed
+e8
+cb
+c5
+ed
+e8
+08
+c5
+f5
+e9
+cb
+c5
+f5
+e9
+08
+c5
+ed
+e9
+cb
+c5
+ed
+e9
+08
+c5
+f5
+d8
+cb
+c5
+f5
+d8
+08
+c5
+ed
+d8
+cb
+c5
+ed
+d8
+08
+c5
+f5
+d9
+cb
+c5
+f5
+d9
+08
+c5
+ed
+d9
+cb
+c5
+ed
+d9
+08
+c5
+f5
+68
+cb
+c5
+f5
+68
+08
+c5
+ed
+68
+cb
+c5
+ed
+68
+08
+c5
+f5
+69
+cb
+c5
+f5
+69
+08
+c5
+ed
+69
+cb
+c5
+ed
+69
+08
+c5
+f5
+6a
+cb
+c5
+f5
+6a
+08
+c5
+ed
+6a
+cb
+c5
+ed
+6a
+08
+c5
+f5
+6d
+cb
+c5
+f5
+6d
+08
+c5
+ed
+6d
+cb
+c5
+ed
+6d
+08
+c5
+f5
+60
+cb
+c5
+f5
+60
+08
+c5
+ed
+60
+cb
+c5
+ed
+60
+08
+c5
+f5
+61
+cb
+c5
+f5
+61
+08
+c5
+ed
+61
+cb
+c5
+ed
+61
+08
+c5
+f5
+62
+cb
+c5
+f5
+62
+08
+c5
+ed
+62
+cb
+c5
+ed
+62
+08
+c5
+f5
+6c
+cb
+c5
+f5
+6c
+08
+c5
+ed
+6c
+cb
+c5
+ed
+6c
+08
+c5
+f5
+ef
+cb
+c5
+f5
+ef
+08
+c5
+ed
+ef
+cb
+c5
+ed
+ef
+08
+c4
+e2
+7d
+2a
+08
+c4
+e2
+79
+18
+ca
+c4
+e2
+7d
+18
+ca
+c4
+e2
+7d
+19
+ca
+c4
+e2
+7d
+5a
+08
+c4
+e3
+6d
+02
+cb
+03
+c4
+e3
+6d
+02
+08
+03
+c4
+e2
+79
+78
+ca
+c4
+e2
+79
+78
+08
+c4
+e2
+7d
+78
+ca
+c4
+e2
+7d
+78
+08
+c4
+e2
+79
+79
+ca
+c4
+e2
+79
+79
+08
+c4
+e2
+7d
+79
+ca
+c4
+e2
+7d
+79
+08
+c4
+e2
+79
+58
+ca
+c4
+e2
+79
+58
+08
+c4
+e2
+7d
+58
+ca
+c4
+e2
+7d
+58
+08
+c4
+e2
+79
+59
+ca
+c4
+e2
+79
+59
+08
+c4
+e2
+7d
+59
+ca
+c4
+e2
+7d
+59
+08
+c4
+e2
+6d
+36
+cb
+c4
+e2
+6d
+36
+08
+c4
+e3
+fd
+01
+ca
+03
+c4
+e3
+fd
+01
+08
+03
+c4
+e2
+6d
+16
+cb
+c4
+e2
+6d
+16
+08
+c4
+e3
+fd
+00
+ca
+03
+c4
+e3
+fd
+00
+08
+03
+c4
+e3
+6d
+46
+cb
+03
+c4
+e3
+6d
+46
+08
+03
+c4
+e3
+7d
+39
+d1
+03
+c4
+e3
+7d
+39
+10
+03
+c4
+e3
+6d
+38
+cb
+03
+c4
+e3
+6d
+38
+08
+03
+c4
+e2
+69
+8c
+08
+c4
+e2
+6d
+8c
+08
+c4
+e2
+71
+8e
+10
+c4
+e2
+75
+8e
+10
+c4
+e2
+e9
+8c
+08
+c4
+e2
+ed
+8c
+08
+c4
+e2
+f1
+8e
+10
+c4
+e2
+f5
+8e
+10
+c4
+e2
+69
+47
+cb
+c4
+e2
+69
+47
+08
+c4
+e2
+6d
+47
+cb
+c4
+e2
+6d
+47
+08
+c4
+e2
+e9
+47
+cb
+c4
+e2
+e9
+47
+08
+c4
+e2
+ed
+47
+cb
+c4
+e2
+ed
+47
+08
+c4
+e2
+69
+46
+cb
+c4
+e2
+69
+46
+08
+c4
+e2
+6d
+46
+cb
+c4
+e2
+6d
+46
+08
+c4
+e2
+69
+45
+cb
+c4
+e2
+69
+45
+08
+c4
+e2
+6d
+45
+cb
+c4
+e2
+6d
+45
+08
+c4
+e2
+e9
+45
+cb
+c4
+e2
+e9
+45
+08
+c4
+e2
+ed
+45
+cb
+c4
+e2
+ed
+45
+08