aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Johnson <peter@tortall.net>2011-07-03 19:44:42 -0700
committerPeter Johnson <peter@tortall.net>2011-07-03 19:54:45 -0700
commitab721f347d281a430b4fe19cb2025151bcb7ef8a (patch)
treedf79b39bd871be9c4591e70087ed514263fdf462
parentd779fcb04e7b47b6054483a498ec3ad77428bb24 (diff)
downloadyasm-ab721f347d281a430b4fe19cb2025151bcb7ef8a.tar.gz
Add AVX2 VGATHER* and VPGATHER* instructions.
These instructions use "VSIB" encoding, which takes the place of the usual SIB encoding. Several tests cover various legal and illegal modes. Last part of [#227 state:resolved].
-rwxr-xr-xmodules/arch/x86/gen_x86_insn.py93
-rw-r--r--modules/arch/x86/tests/Makefile.inc6
-rw-r--r--modules/arch/x86/tests/avx2.asm40
-rw-r--r--modules/arch/x86/tests/avx2.hex192
-rw-r--r--modules/arch/x86/tests/vsib-err.asm14
-rw-r--r--modules/arch/x86/tests/vsib-err.errwarn7
-rw-r--r--modules/arch/x86/tests/vsib.asm114
-rw-r--r--modules/arch/x86/tests/vsib.hex662
-rw-r--r--modules/arch/x86/tests/vsib2-err.asm19
-rw-r--r--modules/arch/x86/tests/vsib2-err.errwarn7
-rw-r--r--modules/arch/x86/x86arch.h3
-rw-r--r--modules/arch/x86/x86bc.c3
-rw-r--r--modules/arch/x86/x86expr.c76
-rw-r--r--modules/arch/x86/x86id.c67
14 files changed, 1283 insertions, 20 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index 5dec9e81..985b3733 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -7027,6 +7027,99 @@ for sz in [128, 256]:
add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02])
+# Vector register in EA.
+add_group("gather_32x_32x",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=128,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="VEX")])
+add_group("gather_32x_32x",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=256, dest="VEX")])
+add_insn("vgatherdpd", "gather_32x_32x", modifiers=[0x92])
+add_insn("vpgatherdq", "gather_32x_32x", modifiers=[0x90])
+
+add_group("gather_64x_64y",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=128,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="VEX")])
+add_group("gather_64x_64y",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=1,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="MemYMMIndex", size=64, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=256, dest="VEX")])
+add_insn("vgatherqpd", "gather_64x_64y", modifiers=[0x93])
+add_insn("vpgatherqq", "gather_64x_64y", modifiers=[0x91])
+
+add_group("gather_32x_32y",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=128,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="VEX")])
+add_group("gather_32x_32y",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+ Operand(type="MemYMMIndex", size=32, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=256, dest="VEX")])
+add_insn("vgatherdps", "gather_32x_32y", modifiers=[0x92])
+add_insn("vpgatherdd", "gather_32x_32y", modifiers=[0x90])
+
+add_group("gather_64x_64y_128",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=128,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="VEX")])
+add_group("gather_64x_64y_128",
+ cpu=["AVX2"],
+ modifiers=["Op2Add"],
+ vex=256,
+ vexw=0,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="MemYMMIndex", size=64, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="VEX")])
+add_insn("vgatherqps", "gather_64x_64y_128", modifiers=[0x93])
+add_insn("vpgatherqd", "gather_64x_64y_128", modifiers=[0x91])
+
#####################################################################
# Intel FMA instructions
#####################################################################
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc
index 7aebd992..07b13a80 100644
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -237,6 +237,12 @@ EXTRA_DIST += modules/arch/x86/tests/vmx.asm
EXTRA_DIST += modules/arch/x86/tests/vmx.hex
EXTRA_DIST += modules/arch/x86/tests/vmx-err.asm
EXTRA_DIST += modules/arch/x86/tests/vmx-err.errwarn
+EXTRA_DIST += modules/arch/x86/tests/vsib.asm
+EXTRA_DIST += modules/arch/x86/tests/vsib.hex
+EXTRA_DIST += modules/arch/x86/tests/vsib-err.asm
+EXTRA_DIST += modules/arch/x86/tests/vsib-err.errwarn
+EXTRA_DIST += modules/arch/x86/tests/vsib2-err.asm
+EXTRA_DIST += modules/arch/x86/tests/vsib2-err.errwarn
EXTRA_DIST += modules/arch/x86/tests/x86label.asm
EXTRA_DIST += modules/arch/x86/tests/x86label.hex
EXTRA_DIST += modules/arch/x86/tests/xchg64.asm
diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm
index acf17ade..2806d23c 100644
--- a/modules/arch/x86/tests/avx2.asm
+++ b/modules/arch/x86/tests/avx2.asm
@@ -610,3 +610,43 @@ vpsrlvq xmm1, xmm2, xmm3 ; c4 e2 e9 45 cb
vpsrlvq xmm1, xmm2, oword [rax] ; c4 e2 e9 45 08
vpsrlvq ymm1, ymm2, ymm3 ; c4 e2 ed 45 cb
vpsrlvq ymm1, ymm2, yword [rax] ; c4 e2 ed 45 08
+
+vgatherdpd xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 92 0c 08
+vgatherdpd xmm1, dword [rax+xmm1], xmm2 ; c4 e2 e9 92 0c 08
+vgatherdpd ymm1, [rax+xmm1], ymm2 ; c4 e2 ed 92 0c 08
+vgatherdpd ymm1, dword [rax+xmm1], ymm2 ; c4 e2 ed 92 0c 08
+
+vgatherqpd xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 93 0c 08
+vgatherqpd xmm1, qword [rax+xmm1], xmm2 ; c4 e2 e9 93 0c 08
+vgatherqpd ymm1, [rax+ymm1], ymm2 ; c4 e2 ed 93 0c 08
+vgatherqpd ymm1, qword [rax+ymm1], ymm2 ; c4 e2 ed 93 0c 08
+
+vgatherdps xmm1, [rax+xmm1], xmm2 ; c4 e2 69 92 0c 08
+vgatherdps xmm1, dword [rax+xmm1], xmm2 ; c4 e2 69 92 0c 08
+vgatherdps ymm1, [rax+ymm1], ymm2 ; c4 e2 6d 92 0c 08
+vgatherdps ymm1, dword [rax+ymm1], ymm2 ; c4 e2 6d 92 0c 08
+
+vgatherqps xmm1, [rax+xmm1], xmm2 ; c4 e2 69 93 0c 08
+vgatherqps xmm1, qword [rax+xmm1], xmm2 ; c4 e2 69 93 0c 08
+vgatherqps xmm1, [rax+ymm1], xmm2 ; c4 e2 6d 93 0c 08
+vgatherqps xmm1, qword [rax+ymm1], xmm2 ; c4 e2 6d 93 0c 08
+
+vpgatherdd xmm1, [rax+xmm1], xmm2 ; c4 e2 69 90 0c 08
+vpgatherdd xmm1, dword [rax+xmm1], xmm2 ; c4 e2 69 90 0c 08
+vpgatherdd ymm1, [rax+ymm1], ymm2 ; c4 e2 6d 90 0c 08
+vpgatherdd ymm1, dword [rax+ymm1], ymm2 ; c4 e2 6d 90 0c 08
+
+vpgatherqd xmm1, [rax+xmm1], xmm2 ; c4 e2 69 91 0c 08
+vpgatherqd xmm1, qword [rax+xmm1], xmm2 ; c4 e2 69 91 0c 08
+vpgatherqd xmm1, [rax+ymm1], xmm2 ; c4 e2 6d 91 0c 08
+vpgatherqd xmm1, qword [rax+ymm1], xmm2 ; c4 e2 6d 91 0c 08
+
+vpgatherdq xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 90 0c 08
+vpgatherdq xmm1, dword [rax+xmm1], xmm2 ; c4 e2 e9 90 0c 08
+vpgatherdq ymm1, [rax+xmm1], ymm2 ; c4 e2 ed 90 0c 08
+vpgatherdq ymm1, dword [rax+xmm1], ymm2 ; c4 e2 ed 90 0c 08
+
+vpgatherqq xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 91 0c 08
+vpgatherqq xmm1, qword [rax+xmm1], xmm2 ; c4 e2 e9 91 0c 08
+vpgatherqq ymm1, [rax+ymm1], ymm2 ; c4 e2 ed 91 0c 08
+vpgatherqq ymm1, qword [rax+ymm1], ymm2 ; c4 e2 ed 91 0c 08
diff --git a/modules/arch/x86/tests/avx2.hex b/modules/arch/x86/tests/avx2.hex
index 3d9e9cd4..17f84bd3 100644
--- a/modules/arch/x86/tests/avx2.hex
+++ b/modules/arch/x86/tests/avx2.hex
@@ -2103,3 +2103,195 @@ e2
ed
45
08
+c4
+e2
+e9
+92
+0c
+08
+c4
+e2
+e9
+92
+0c
+08
+c4
+e2
+ed
+92
+0c
+08
+c4
+e2
+ed
+92
+0c
+08
+c4
+e2
+e9
+93
+0c
+08
+c4
+e2
+e9
+93
+0c
+08
+c4
+e2
+ed
+93
+0c
+08
+c4
+e2
+ed
+93
+0c
+08
+c4
+e2
+69
+92
+0c
+08
+c4
+e2
+69
+92
+0c
+08
+c4
+e2
+6d
+92
+0c
+08
+c4
+e2
+6d
+92
+0c
+08
+c4
+e2
+69
+93
+0c
+08
+c4
+e2
+69
+93
+0c
+08
+c4
+e2
+6d
+93
+0c
+08
+c4
+e2
+6d
+93
+0c
+08
+c4
+e2
+69
+90
+0c
+08
+c4
+e2
+69
+90
+0c
+08
+c4
+e2
+6d
+90
+0c
+08
+c4
+e2
+6d
+90
+0c
+08
+c4
+e2
+69
+91
+0c
+08
+c4
+e2
+69
+91
+0c
+08
+c4
+e2
+6d
+91
+0c
+08
+c4
+e2
+6d
+91
+0c
+08
+c4
+e2
+e9
+90
+0c
+08
+c4
+e2
+e9
+90
+0c
+08
+c4
+e2
+ed
+90
+0c
+08
+c4
+e2
+ed
+90
+0c
+08
+c4
+e2
+e9
+91
+0c
+08
+c4
+e2
+e9
+91
+0c
+08
+c4
+e2
+ed
+91
+0c
+08
+c4
+e2
+ed
+91
+0c
+08
diff --git a/modules/arch/x86/tests/vsib-err.asm b/modules/arch/x86/tests/vsib-err.asm
new file mode 100644
index 00000000..ee306038
--- /dev/null
+++ b/modules/arch/x86/tests/vsib-err.asm
@@ -0,0 +1,14 @@
+; Errors caught during instruction matching
+
+[bits 64]
+
+vpgatherdq xmm0,xmm0,xmm0 ; no reg EA template
+
+vpgatherdq xmm0,[ymm0],xmm0 ; not a VSIB256 template
+vpgatherqq ymm0,[xmm0],ymm0 ; not a VSIB128 template
+
+vpgatherdq xmm0,[rel 0],xmm0
+vpgatherdq xmm0,[0],xmm0
+vpgatherdq xmm0,[rax],xmm0
+vpgatherdq xmm0,[rax+rbx],xmm0
+
diff --git a/modules/arch/x86/tests/vsib-err.errwarn b/modules/arch/x86/tests/vsib-err.errwarn
new file mode 100644
index 00000000..8d2d084f
--- /dev/null
+++ b/modules/arch/x86/tests/vsib-err.errwarn
@@ -0,0 +1,7 @@
+-:5: error: invalid combination of opcode and operands
+-:7: error: invalid combination of opcode and operands
+-:8: error: invalid combination of opcode and operands
+-:10: error: invalid combination of opcode and operands
+-:11: error: invalid combination of opcode and operands
+-:12: error: invalid combination of opcode and operands
+-:13: error: invalid combination of opcode and operands
diff --git a/modules/arch/x86/tests/vsib.asm b/modules/arch/x86/tests/vsib.asm
new file mode 100644
index 00000000..fc2a705b
--- /dev/null
+++ b/modules/arch/x86/tests/vsib.asm
@@ -0,0 +1,114 @@
+[bits 16]
+; test promotion to 32-bit address size
+vpgatherdq xmm0,[xmm0],xmm0 ; 67 c4 e2 f9 90 04 05 00 00 00 00
+vpgatherqq ymm0,[ymm0],ymm0 ; 67 c4 e2 fd 91 04 05 00 00 00 00
+
+[bits 32]
+; test promotion from base to index
+vpgatherdq xmm0,[xmm0],xmm0 ; c4 e2 f9 90 04 05 00 00 00 00
+vpgatherqq ymm0,[ymm0],ymm0 ; c4 e2 fd 91 04 05 00 00 00 00
+
+; various combinations
+vpgatherdq xmm0,[ecx+xmm5],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ecx+ymm5],ymm0 ; c4 e2 fd 91 04 29
+vpgatherdq xmm0,[ebp+xmm5],xmm0 ; c4 e2 f9 90 44 2d 00
+vpgatherqq ymm0,[ebp+ymm5],ymm0 ; c4 e2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[xmm5+ecx],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ymm5+ecx],ymm0 ; c4 e2 fd 91 04 29
+vpgatherdq xmm0,[xmm5+ebp],xmm0 ; c4 e2 f9 90 44 2d 00
+vpgatherqq ymm0,[ymm5+ebp],ymm0 ; c4 e2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[ecx+xmm5*1],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ecx+ymm5*1],ymm0 ; c4 e2 fd 91 04 29
+vpgatherdq xmm0,[ebp+xmm5*1],xmm0 ; c4 e2 f9 90 44 2d 00
+vpgatherqq ymm0,[ebp+ymm5*1],ymm0 ; c4 e2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[xmm5+ecx*1],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ymm5+ecx*1],ymm0 ; c4 e2 fd 91 04 29
+vpgatherdq xmm0,[xmm5+ebp*1],xmm0 ; c4 e2 f9 90 44 2d 00
+vpgatherqq ymm0,[ymm5+ebp*1],ymm0 ; c4 e2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[nosplit 12345678h + xmm5*1],xmm0; c4 e2 f9 90 04 2d 78 56 34 12
+vpgatherqq ymm0,[nosplit 12345678h + ymm5*1],ymm0; c4 e2 fd 91 04 2d 78 56 34 12
+
+vpgatherdq xmm0,[byte ecx + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 69 0c
+vpgatherqq ymm0,[byte ecx + 12 + ymm5*2],ymm0 ; c4 e2 fd 91 44 69 0c
+vpgatherdq xmm0,[byte ebp + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 6d 0c
+vpgatherqq ymm0,[byte ebp + 12 + ymm5*2],ymm0 ; c4 e2 fd 91 44 6d 0c
+
+vpgatherdq xmm0,[dword ecx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 0c 00 00 00
+vpgatherqq ymm0,[dword ecx + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 84 a9 0c 00 00 00
+vpgatherdq xmm0,[dword ebp + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 ad 0c 00 00 00
+vpgatherqq ymm0,[dword ebp + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 84 ad 0c 00 00 00
+
+vpgatherdq xmm0,[ecx + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 78 56 34 12
+vpgatherqq ymm0,[ecx + 12345678h + ymm5*4],ymm0 ; c4 e2 fd 91 84 a9 78 56 34 12
+vpgatherdq xmm0,[ebp + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 ad 78 56 34 12
+vpgatherqq ymm0,[ebp + 12345678h + ymm5*4],ymm0 ; c4 e2 fd 91 84 ad 78 56 34 12
+
+vpgatherdq xmm0,[ecx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 a9 0c
+vpgatherqq ymm0,[ecx + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 44 a9 0c
+vpgatherdq xmm0,[ebp + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 ad 0c
+vpgatherqq ymm0,[ebp + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 44 ad 0c
+
+vpgatherdq xmm0,[dword 12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00
+vpgatherqq ymm0,[dword 12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00
+vpgatherdq xmm0,[12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00
+vpgatherqq ymm0,[12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00
+
+[bits 64]
+; test promotion from base to index
+vpgatherdq xmm0,[xmm0],xmm0 ; c4 e2 f9 90 04 05 00 00 00 00
+vpgatherqq ymm0,[ymm0],ymm0 ; c4 e2 fd 91 04 05 00 00 00 00
+
+; various combinations
+vpgatherdq xmm0,[rcx+xmm5],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[rcx+ymm13],ymm0 ; c4 a2 fd 91 04 29
+vpgatherdq xmm0,[r13+xmm13],xmm0 ; c4 82 f9 90 44 2d 00
+vpgatherqq ymm0,[r13+ymm5],ymm0 ; c4 c2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[xmm5+rcx],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ymm13+rcx],ymm0 ; c4 a2 fd 91 04 29
+vpgatherdq xmm0,[xmm13+r13],xmm0 ; c4 82 f9 90 44 2d 00
+vpgatherqq ymm0,[ymm5+r13],ymm0 ; c4 c2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[rcx+xmm5*1],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[rcx+ymm13*1],ymm0 ; c4 a2 fd 91 04 29
+vpgatherdq xmm0,[r13+xmm13*1],xmm0 ; c4 82 f9 90 44 2d 00
+vpgatherqq ymm0,[r13+ymm5*1],ymm0 ; c4 c2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[xmm5+rcx*1],xmm0 ; c4 e2 f9 90 04 29
+vpgatherqq ymm0,[ymm13+rcx*1],ymm0 ; c4 a2 fd 91 04 29
+vpgatherdq xmm0,[xmm13+r13*1],xmm0 ; c4 82 f9 90 44 2d 00
+vpgatherqq ymm0,[ymm5+r13*1],ymm0 ; c4 c2 fd 91 44 2d 00
+
+vpgatherdq xmm0,[nosplit 12345678h + xmm5*1],xmm0; c4 e2 f9 90 04 2d 78 56 34 12
+vpgatherqq ymm0,[nosplit 12345678h + ymm5*1],ymm0; c4 e2 fd 91 04 2d 78 56 34 12
+
+vpgatherdq xmm0,[byte rcx + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 69 0c
+vpgatherqq ymm0,[byte rcx + 12 + ymm13*2],ymm0 ; c4 a2 fd 91 44 69 0c
+vpgatherdq xmm0,[byte r13 + 12 + xmm13*2],xmm0 ; c4 82 f9 90 44 6d 0c
+vpgatherqq ymm0,[byte r13 + 12 + ymm5*2],ymm0 ; c4 c2 fd 91 44 6d 0c
+
+vpgatherdq xmm0,[dword rcx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 0c 00 00 00
+vpgatherqq ymm0,[dword rcx + 12 + ymm13*4],ymm0 ; c4 a2 fd 91 84 a9 0c 00 00 00
+vpgatherdq xmm0,[dword r13 + 12 + xmm13*4],xmm0 ; c4 82 f9 90 84 ad 0c 00 00 00
+vpgatherqq ymm0,[dword r13 + 12 + ymm5*4],ymm0 ; c4 c2 fd 91 84 ad 0c 00 00 00
+
+vpgatherdq xmm0,[rcx + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 78 56 34 12
+vpgatherqq ymm0,[rcx + 12345678h + ymm13*4],ymm0; c4 a2 fd 91 84 a9 78 56 34 12
+vpgatherdq xmm0,[r13 + 12345678h + xmm13*4],xmm0; c4 82 f9 90 84 ad 78 56 34 12
+vpgatherqq ymm0,[r13 + 12345678h + ymm5*4],ymm0 ; c4 c2 fd 91 84 ad 78 56 34 12
+
+vpgatherdq xmm0,[rcx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 a9 0c
+vpgatherqq ymm0,[rcx + 12 + ymm13*4],ymm0 ; c4 a2 fd 91 44 a9 0c
+vpgatherdq xmm0,[r13 + 12 + xmm13*4],xmm0 ; c4 82 f9 90 44 ad 0c
+vpgatherqq ymm0,[r13 + 12 + ymm5*4],ymm0 ; c4 c2 fd 91 44 ad 0c
+
+vpgatherdq xmm0,[dword 12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00
+vpgatherqq ymm0,[dword 12 + ymm13*8],ymm0 ; c4 a2 fd 91 04 ed 0c 00 00 00
+vpgatherdq xmm0,[12 + xmm13*8],xmm0 ; c4 a2 f9 90 04 ed 0c 00 00 00
+vpgatherqq ymm0,[12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00
+
+
diff --git a/modules/arch/x86/tests/vsib.hex b/modules/arch/x86/tests/vsib.hex
new file mode 100644
index 00000000..7dfde886
--- /dev/null
+++ b/modules/arch/x86/tests/vsib.hex
@@ -0,0 +1,662 @@
+67
+c4
+e2
+f9
+90
+04
+05
+00
+00
+00
+00
+67
+c4
+e2
+fd
+91
+04
+05
+00
+00
+00
+00
+c4
+e2
+f9
+90
+04
+05
+00
+00
+00
+00
+c4
+e2
+fd
+91
+04
+05
+00
+00
+00
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+e2
+fd
+91
+04
+29
+c4
+e2
+f9
+90
+44
+2d
+00
+c4
+e2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+e2
+fd
+91
+04
+29
+c4
+e2
+f9
+90
+44
+2d
+00
+c4
+e2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+e2
+fd
+91
+04
+29
+c4
+e2
+f9
+90
+44
+2d
+00
+c4
+e2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+e2
+fd
+91
+04
+29
+c4
+e2
+f9
+90
+44
+2d
+00
+c4
+e2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+2d
+78
+56
+34
+12
+c4
+e2
+fd
+91
+04
+2d
+78
+56
+34
+12
+c4
+e2
+f9
+90
+44
+69
+0c
+c4
+e2
+fd
+91
+44
+69
+0c
+c4
+e2
+f9
+90
+44
+6d
+0c
+c4
+e2
+fd
+91
+44
+6d
+0c
+c4
+e2
+f9
+90
+84
+a9
+0c
+00
+00
+00
+c4
+e2
+fd
+91
+84
+a9
+0c
+00
+00
+00
+c4
+e2
+f9
+90
+84
+ad
+0c
+00
+00
+00
+c4
+e2
+fd
+91
+84
+ad
+0c
+00
+00
+00
+c4
+e2
+f9
+90
+84
+a9
+78
+56
+34
+12
+c4
+e2
+fd
+91
+84
+a9
+78
+56
+34
+12
+c4
+e2
+f9
+90
+84
+ad
+78
+56
+34
+12
+c4
+e2
+fd
+91
+84
+ad
+78
+56
+34
+12
+c4
+e2
+f9
+90
+44
+a9
+0c
+c4
+e2
+fd
+91
+44
+a9
+0c
+c4
+e2
+f9
+90
+44
+ad
+0c
+c4
+e2
+fd
+91
+44
+ad
+0c
+c4
+e2
+f9
+90
+04
+ed
+0c
+00
+00
+00
+c4
+e2
+fd
+91
+04
+ed
+0c
+00
+00
+00
+c4
+e2
+f9
+90
+04
+ed
+0c
+00
+00
+00
+c4
+e2
+fd
+91
+04
+ed
+0c
+00
+00
+00
+c4
+e2
+f9
+90
+04
+05
+00
+00
+00
+00
+c4
+e2
+fd
+91
+04
+05
+00
+00
+00
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+a2
+fd
+91
+04
+29
+c4
+82
+f9
+90
+44
+2d
+00
+c4
+c2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+a2
+fd
+91
+04
+29
+c4
+82
+f9
+90
+44
+2d
+00
+c4
+c2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+a2
+fd
+91
+04
+29
+c4
+82
+f9
+90
+44
+2d
+00
+c4
+c2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+29
+c4
+a2
+fd
+91
+04
+29
+c4
+82
+f9
+90
+44
+2d
+00
+c4
+c2
+fd
+91
+44
+2d
+00
+c4
+e2
+f9
+90
+04
+2d
+78
+56
+34
+12
+c4
+e2
+fd
+91
+04
+2d
+78
+56
+34
+12
+c4
+e2
+f9
+90
+44
+69
+0c
+c4
+a2
+fd
+91
+44
+69
+0c
+c4
+82
+f9
+90
+44
+6d
+0c
+c4
+c2
+fd
+91
+44
+6d
+0c
+c4
+e2
+f9
+90
+84
+a9
+0c
+00
+00
+00
+c4
+a2
+fd
+91
+84
+a9
+0c
+00
+00
+00
+c4
+82
+f9
+90
+84
+ad
+0c
+00
+00
+00
+c4
+c2
+fd
+91
+84
+ad
+0c
+00
+00
+00
+c4
+e2
+f9
+90
+84
+a9
+78
+56
+34
+12
+c4
+a2
+fd
+91
+84
+a9
+78
+56
+34
+12
+c4
+82
+f9
+90
+84
+ad
+78
+56
+34
+12
+c4
+c2
+fd
+91
+84
+ad
+78
+56
+34
+12
+c4
+e2
+f9
+90
+44
+a9
+0c
+c4
+a2
+fd
+91
+44
+a9
+0c
+c4
+82
+f9
+90
+44
+ad
+0c
+c4
+c2
+fd
+91
+44
+ad
+0c
+c4
+e2
+f9
+90
+04
+ed
+0c
+00
+00
+00
+c4
+a2
+fd
+91
+04
+ed
+0c
+00
+00
+00
+c4
+a2
+f9
+90
+04
+ed
+0c
+00
+00
+00
+c4
+e2
+fd
+91
+04
+ed
+0c
+00
+00
+00
diff --git a/modules/arch/x86/tests/vsib2-err.asm b/modules/arch/x86/tests/vsib2-err.asm
new file mode 100644
index 00000000..7e3ac55e
--- /dev/null
+++ b/modules/arch/x86/tests/vsib2-err.asm
@@ -0,0 +1,19 @@
+; Errors caught during EA checking
+
+[bits 32]
+vpgatherqq ymm0,[ymm0+ecx*2],ymm0
+
+[bits 64]
+addpd xmm0,[xmm0] ; not a VSIB128 template
+addpd xmm0,[ymm0] ; not a VSIB256 template
+
+[bits 32]
+vpgatherdq xmm0,[bp+xmm0],xmm0
+
+vpgatherdq xmm0,[xmm0+ymm0],xmm0
+
+vpgatherqq ymm0,[word ymm0],ymm0
+
+vpgatherqq ymm0,[byte ymm0],ymm0
+
+
diff --git a/modules/arch/x86/tests/vsib2-err.errwarn b/modules/arch/x86/tests/vsib2-err.errwarn
new file mode 100644
index 00000000..877ce62e
--- /dev/null
+++ b/modules/arch/x86/tests/vsib2-err.errwarn
@@ -0,0 +1,7 @@
+-:4: error: invalid effective address
+-:7: error: invalid effective address
+-:8: error: invalid effective address
+-:11: error: invalid effective address
+-:13: error: invalid effective address
+-:15: error: invalid effective address
+-:17: warning: invalid displacement size; fixed
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h
index 8e7caf82..c46ea5b8 100644
--- a/modules/arch/x86/x86arch.h
+++ b/modules/arch/x86/x86arch.h
@@ -177,6 +177,9 @@ int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3,
typedef struct x86_effaddr {
yasm_effaddr ea; /* base structure */
+ /* VSIB uses the normal SIB byte, but this flag enables it. */
+ unsigned char vsib_mode; /* 0 if not, 1 if XMM, 2 if YMM */
+
/* How the spare (register) bits in Mod/RM are handled:
* Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
* They're set in bytecode_create_insn().
diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c
index 49df4f8f..8666943e 100644
--- a/modules/arch/x86/x86bc.c
+++ b/modules/arch/x86/x86bc.c
@@ -191,6 +191,7 @@ ea_create(void)
x86_ea->ea.pc_rel = 0;
x86_ea->ea.not_pc_rel = 0;
x86_ea->ea.data_len = 0;
+ x86_ea->vsib_mode = 0;
x86_ea->modrm = 0;
x86_ea->valid_modrm = 0;
x86_ea->need_modrm = 0;
@@ -382,6 +383,8 @@ yasm_x86__ea_print(const yasm_effaddr *ea, FILE *f, int indent_level)
fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit);
fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "",
(unsigned int)x86_ea->ea.segreg);
+ fprintf(f, "%*sVSIBMode=%u\n", indent_level, "",
+ (unsigned int)x86_ea->vsib_mode);
fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "",
(unsigned int)x86_ea->modrm, (unsigned int)x86_ea->valid_modrm,
(unsigned int)x86_ea->need_modrm);
diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c
index 814a9b2a..cacf827a 100644
--- a/modules/arch/x86/x86expr.c
+++ b/modules/arch/x86/x86expr.c
@@ -34,6 +34,7 @@
typedef struct x86_checkea_reg3264_data {
int *regs; /* total multiplier for each reg */
+ unsigned char vsib_mode;
unsigned char bits;
unsigned char addrsize;
} x86_checkea_reg3264_data;
@@ -58,6 +59,20 @@ x86_expr_checkea_get_reg3264(yasm_expr__item *ei, int *regnum,
return 0;
*regnum = (unsigned int)(ei->data.reg & 0xF);
break;
+ case X86_XMMREG:
+ if (data->vsib_mode != 1)
+ return 0;
+ if (data->bits != 64 && (ei->data.reg & 0x8) == 0x8)
+ return 0;
+ *regnum = 17+(unsigned int)(ei->data.reg & 0xF);
+ break;
+ case X86_YMMREG:
+ if (data->vsib_mode != 2)
+ return 0;
+ if (data->bits != 64 && (ei->data.reg & 0x8) == 0x8)
+ return 0;
+ *regnum = 17+(unsigned int)(ei->data.reg & 0xF);
+ break;
case X86_RIP:
if (data->bits != 64)
return 0;
@@ -606,6 +621,11 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
}
/*@fallthrough@*/
default:
+ /* If SIB is required, but we're in 16-bit mode, set to 32. */
+ if (bits == 16 && x86_ea->need_sib == 1) {
+ *addrsize = 32;
+ break;
+ }
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
@@ -643,13 +663,19 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
REG64_R13,
REG64_R14,
REG64_R15,
- REG64_RIP
+ REG64_RIP,
+ SIMDREGS
} reg3264type;
- int reg3264mult[17] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ int reg3264mult[33] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
x86_checkea_reg3264_data reg3264_data;
int basereg = REG3264_NONE; /* "base" register (for SIB) */
int indexreg = REG3264_NONE; /* "index" register (for SIB) */
+ int regcount = 17; /* normally don't check SIMD regs */
+
+ if (x86_ea->vsib_mode != 0)
+ regcount = 33;
/* We can only do 64-bit addresses in 64-bit mode. */
if (*addrsize == 64 && bits != 64) {
@@ -665,6 +691,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
}
reg3264_data.regs = reg3264mult;
+ reg3264_data.vsib_mode = x86_ea->vsib_mode;
reg3264_data.bits = bits;
reg3264_data.addrsize = *addrsize;
if (x86_ea->ea.disp.abs) {
@@ -698,7 +725,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
* Also, if an indexreg hasn't been assigned, try to find one.
* Meanwhile, check to make sure there's no negative register mults.
*/
- for (i=0; i<17; i++) {
+ for (i=0; i<regcount; i++) {
if (reg3264mult[i] < 0) {
yasm_error_set(YASM_ERROR_VALUE,
N_("invalid effective address"));
@@ -711,10 +738,26 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
indexreg = i;
}
- /* Handle certain special cases of indexreg mults when basereg is
- * empty.
- */
- if (indexreg != REG3264_NONE && basereg == REG3264_NONE)
+ if (x86_ea->vsib_mode != 0) {
+ /* For VSIB, the SIMD register needs to go into the indexreg.
+ * Also check basereg (must be a GPR if present) and indexreg
+ * (must be a SIMD register).
+ */
+ if (basereg >= SIMDREGS &&
+ (indexreg == REG3264_NONE || reg3264mult[indexreg] == 1)) {
+ int temp = basereg;
+ basereg = indexreg;
+ indexreg = temp;
+ }
+ if (basereg >= REG64_RIP || indexreg < SIMDREGS) {
+ yasm_error_set(YASM_ERROR_VALUE,
+ N_("invalid effective address"));
+ return 1;
+ }
+ } else if (indexreg != REG3264_NONE && basereg == REG3264_NONE)
+ /* Handle certain special cases of indexreg mults when basereg is
+ * empty.
+ */
switch (reg3264mult[indexreg]) {
case 1:
/* Only optimize this way if nosplit wasn't specified */
@@ -741,7 +784,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
/* Make sure there's no other registers than the basereg and indexreg
* we just found.
*/
- for (i=0; i<17; i++)
+ for (i=0; i<regcount; i++)
if (i != basereg && i != indexreg && reg3264mult[i] != 0) {
yasm_error_set(YASM_ERROR_VALUE,
N_("invalid effective address"));
@@ -861,10 +904,17 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize,
x86_ea->sib |= 040;
/* Any scale field is valid, just leave at 0. */
else {
- if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int)
- (X86_REG64 | indexreg), bits,
- X86_REX_X))
- return 1;
+ if (indexreg >= SIMDREGS) {
+ if (yasm_x86__set_rex_from_reg(rex, &low3,
+ (unsigned int)(X86_XMMREG | (indexreg-SIMDREGS)),
+ bits, X86_REX_X))
+ return 1;
+ } else {
+ if (yasm_x86__set_rex_from_reg(rex, &low3,
+ (unsigned int)(X86_REG64 | indexreg),
+ bits, X86_REX_X))
+ return 1;
+ }
x86_ea->sib |= low3 << 3;
/* Set scale field, 1 case -> 0, so don't bother. */
switch (reg3264mult[indexreg]) {
diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c
index c734d335..0e8a5be9 100644
--- a/modules/arch/x86/x86id.c
+++ b/modules/arch/x86/x86id.c
@@ -111,7 +111,11 @@ enum x86_operand_type {
*/
OPT_MemrAX = 25,
/* EAX memory operand only (EA) [special case for SVM skinit opcode] */
- OPT_MemEAX = 26
+ OPT_MemEAX = 26,
+ /* XMM VSIB memory operand */
+ OPT_MemXMMIndex = 27,
+ /* YMM VSIB memory operand */
+ OPT_MemYMMIndex = 28
};
enum x86_operand_size {
@@ -347,6 +351,37 @@ static const yasm_bytecode_callback x86_id_insn_callback = {
#include "x86insns.c"
+/* Looks for the first SIMD register match for the purposes of VSIB matching.
+ * Full legality checking is performed in EA code.
+ */
+static int
+x86_expr_contains_simd_cb(const yasm_expr__item *ei, void *d)
+{
+ int ymm = *((int *)d);
+ if (ei->type != YASM_EXPR_REG)
+ return 0;
+ switch ((x86_expritem_reg_size)(ei->data.reg & ~0xFUL)) {
+ case X86_XMMREG:
+ if (!ymm)
+ return 1;
+ break;
+ case X86_YMMREG:
+ if (ymm)
+ return 1;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int
+x86_expr_contains_simd(const yasm_expr *e, int ymm)
+{
+ return yasm_expr__traverse_leaves_in_const(e, &ymm,
+ x86_expr_contains_simd_cb);
+}
+
static void
x86_finalize_common(x86_common *common, const x86_insn_info *info,
unsigned int mode_bits)
@@ -851,6 +886,16 @@ x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops,
mismatch = 1;
break;
}
+ case OPT_MemXMMIndex:
+ if (op->type != YASM_INSN__OPERAND_MEMORY ||
+ !x86_expr_contains_simd(op->data.ea->disp.abs, 0))
+ mismatch = 1;
+ break;
+ case OPT_MemYMMIndex:
+ if (op->type != YASM_INSN__OPERAND_MEMORY ||
+ !x86_expr_contains_simd(op->data.ea->disp.abs, 1))
+ mismatch = 1;
+ break;
default:
yasm_internal_error(N_("invalid operand type"));
}
@@ -1231,12 +1276,20 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc)
if (info_ops[i].type == OPT_MemOffs)
/* Special-case for MOV MemOffs instruction */
yasm_x86__ea_set_disponly(insn->x86_ea);
- else if (id_insn->default_rel &&
- !op->data.ea->not_pc_rel &&
- op->data.ea->segreg != 0x6404 &&
- op->data.ea->segreg != 0x6505 &&
- !yasm_expr__contains(
- op->data.ea->disp.abs, YASM_EXPR_REG))
+ else if (info_ops[i].type == OPT_MemXMMIndex) {
+ /* Remember VSIB mode */
+ insn->x86_ea->vsib_mode = 1;
+ insn->x86_ea->need_sib = 1;
+ } else if (info_ops[i].type == OPT_MemYMMIndex) {
+ /* Remember VSIB mode */
+ insn->x86_ea->vsib_mode = 2;
+ insn->x86_ea->need_sib = 1;
+ } else if (id_insn->default_rel &&
+ !op->data.ea->not_pc_rel &&
+ op->data.ea->segreg != 0x6404 &&
+ op->data.ea->segreg != 0x6505 &&
+ !yasm_expr__contains(
+ op->data.ea->disp.abs, YASM_EXPR_REG))
/* Enable default PC-rel if no regs and segreg
* is not FS or GS.
*/