diff options
author | Peter Johnson <peter@tortall.net> | 2011-07-03 19:44:42 -0700 |
---|---|---|
committer | Peter Johnson <peter@tortall.net> | 2011-07-03 19:54:45 -0700 |
commit | ab721f347d281a430b4fe19cb2025151bcb7ef8a (patch) | |
tree | df79b39bd871be9c4591e70087ed514263fdf462 | |
parent | d779fcb04e7b47b6054483a498ec3ad77428bb24 (diff) | |
download | yasm-ab721f347d281a430b4fe19cb2025151bcb7ef8a.tar.gz |
Add AVX2 VGATHER* and VPGATHER* instructions.
These instructions use "VSIB" encoding, which takes the place of the
usual SIB encoding. Several tests cover various legal and illegal
modes.
Last part of [#227 state:resolved].
-rwxr-xr-x | modules/arch/x86/gen_x86_insn.py | 93 | ||||
-rw-r--r-- | modules/arch/x86/tests/Makefile.inc | 6 | ||||
-rw-r--r-- | modules/arch/x86/tests/avx2.asm | 40 | ||||
-rw-r--r-- | modules/arch/x86/tests/avx2.hex | 192 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib-err.asm | 14 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib-err.errwarn | 7 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib.asm | 114 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib.hex | 662 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib2-err.asm | 19 | ||||
-rw-r--r-- | modules/arch/x86/tests/vsib2-err.errwarn | 7 | ||||
-rw-r--r-- | modules/arch/x86/x86arch.h | 3 | ||||
-rw-r--r-- | modules/arch/x86/x86bc.c | 3 | ||||
-rw-r--r-- | modules/arch/x86/x86expr.c | 76 | ||||
-rw-r--r-- | modules/arch/x86/x86id.c | 67 |
14 files changed, 1283 insertions, 20 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py index 5dec9e81..985b3733 100755 --- a/modules/arch/x86/gen_x86_insn.py +++ b/modules/arch/x86/gen_x86_insn.py @@ -7027,6 +7027,99 @@ for sz in [128, 256]: add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02]) +# Vector register in EA. +add_group("gather_32x_32x", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=128, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="VEX")]) +add_group("gather_32x_32x", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=256, dest="VEX")]) +add_insn("vgatherdpd", "gather_32x_32x", modifiers=[0x92]) +add_insn("vpgatherdq", "gather_32x_32x", modifiers=[0x90]) + +add_group("gather_64x_64y", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=128, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="VEX")]) +add_group("gather_64x_64y", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=1, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="MemYMMIndex", size=64, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=256, dest="VEX")]) +add_insn("vgatherqpd", "gather_64x_64y", modifiers=[0x93]) +add_insn("vpgatherqq", "gather_64x_64y", modifiers=[0x91]) + +add_group("gather_32x_32y", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=128, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="VEX")]) +add_group("gather_32x_32y", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=256, dest="Spare"), + Operand(type="MemYMMIndex", size=32, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=256, dest="VEX")]) +add_insn("vgatherdps", "gather_32x_32y", modifiers=[0x92]) +add_insn("vpgatherdd", "gather_32x_32y", modifiers=[0x90]) + +add_group("gather_64x_64y_128", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=128, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="VEX")]) +add_group("gather_64x_64y_128", + cpu=["AVX2"], + modifiers=["Op2Add"], + vex=256, + vexw=0, + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="MemYMMIndex", size=64, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="VEX")]) +add_insn("vgatherqps", "gather_64x_64y_128", modifiers=[0x93]) +add_insn("vpgatherqd", "gather_64x_64y_128", modifiers=[0x91]) + ##################################################################### # Intel FMA instructions ##################################################################### diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index 7aebd992..07b13a80 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -237,6 +237,12 @@ EXTRA_DIST += modules/arch/x86/tests/vmx.asm EXTRA_DIST += modules/arch/x86/tests/vmx.hex EXTRA_DIST += modules/arch/x86/tests/vmx-err.asm EXTRA_DIST += modules/arch/x86/tests/vmx-err.errwarn +EXTRA_DIST += modules/arch/x86/tests/vsib.asm +EXTRA_DIST += modules/arch/x86/tests/vsib.hex +EXTRA_DIST += modules/arch/x86/tests/vsib-err.asm +EXTRA_DIST += modules/arch/x86/tests/vsib-err.errwarn +EXTRA_DIST += modules/arch/x86/tests/vsib2-err.asm +EXTRA_DIST += modules/arch/x86/tests/vsib2-err.errwarn EXTRA_DIST += modules/arch/x86/tests/x86label.asm EXTRA_DIST += modules/arch/x86/tests/x86label.hex EXTRA_DIST += modules/arch/x86/tests/xchg64.asm diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm index acf17ade..2806d23c 100644 --- a/modules/arch/x86/tests/avx2.asm +++ b/modules/arch/x86/tests/avx2.asm @@ -610,3 +610,43 @@ vpsrlvq xmm1, xmm2, xmm3 ; c4 e2 e9 45 cb vpsrlvq xmm1, xmm2, oword [rax] ; c4 e2 e9 45 08 vpsrlvq ymm1, ymm2, ymm3 ; c4 e2 ed 45 cb vpsrlvq ymm1, ymm2, yword [rax] ; c4 e2 ed 45 08 + +vgatherdpd xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 92 0c 08 +vgatherdpd xmm1, dword [rax+xmm1], xmm2 ; c4 e2 e9 92 0c 08 +vgatherdpd ymm1, [rax+xmm1], ymm2 ; c4 e2 ed 92 0c 08 +vgatherdpd ymm1, dword [rax+xmm1], ymm2 ; c4 e2 ed 92 0c 08 + +vgatherqpd xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 93 0c 08 +vgatherqpd xmm1, qword [rax+xmm1], xmm2 ; c4 e2 e9 93 0c 08 +vgatherqpd ymm1, [rax+ymm1], ymm2 ; c4 e2 ed 93 0c 08 +vgatherqpd ymm1, qword [rax+ymm1], ymm2 ; c4 e2 ed 93 0c 08 + +vgatherdps xmm1, [rax+xmm1], xmm2 ; c4 e2 69 92 0c 08 +vgatherdps xmm1, dword [rax+xmm1], xmm2 ; c4 e2 69 92 0c 08 +vgatherdps ymm1, [rax+ymm1], ymm2 ; c4 e2 6d 92 0c 08 +vgatherdps ymm1, dword [rax+ymm1], ymm2 ; c4 e2 6d 92 0c 08 + +vgatherqps xmm1, [rax+xmm1], xmm2 ; c4 e2 69 93 0c 08 +vgatherqps xmm1, qword [rax+xmm1], xmm2 ; c4 e2 69 93 0c 08 +vgatherqps xmm1, [rax+ymm1], xmm2 ; c4 e2 6d 93 0c 08 +vgatherqps xmm1, qword [rax+ymm1], xmm2 ; c4 e2 6d 93 0c 08 + +vpgatherdd xmm1, [rax+xmm1], xmm2 ; c4 e2 69 90 0c 08 +vpgatherdd xmm1, dword [rax+xmm1], xmm2 ; c4 e2 69 90 0c 08 +vpgatherdd ymm1, [rax+ymm1], ymm2 ; c4 e2 6d 90 0c 08 +vpgatherdd ymm1, dword [rax+ymm1], ymm2 ; c4 e2 6d 90 0c 08 + +vpgatherqd xmm1, [rax+xmm1], xmm2 ; c4 e2 69 91 0c 08 +vpgatherqd xmm1, qword [rax+xmm1], xmm2 ; c4 e2 69 91 0c 08 +vpgatherqd xmm1, [rax+ymm1], xmm2 ; c4 e2 6d 91 0c 08 +vpgatherqd xmm1, qword [rax+ymm1], xmm2 ; c4 e2 6d 91 0c 08 + +vpgatherdq xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 90 0c 08 +vpgatherdq xmm1, dword [rax+xmm1], xmm2 ; c4 e2 e9 90 0c 08 +vpgatherdq ymm1, [rax+xmm1], ymm2 ; c4 e2 ed 90 0c 08 +vpgatherdq ymm1, dword [rax+xmm1], ymm2 ; c4 e2 ed 90 0c 08 + +vpgatherqq xmm1, [rax+xmm1], xmm2 ; c4 e2 e9 91 0c 08 +vpgatherqq xmm1, qword [rax+xmm1], xmm2 ; c4 e2 e9 91 0c 08 +vpgatherqq ymm1, [rax+ymm1], ymm2 ; c4 e2 ed 91 0c 08 +vpgatherqq ymm1, qword [rax+ymm1], ymm2 ; c4 e2 ed 91 0c 08 diff --git a/modules/arch/x86/tests/avx2.hex b/modules/arch/x86/tests/avx2.hex index 3d9e9cd4..17f84bd3 100644 --- a/modules/arch/x86/tests/avx2.hex +++ b/modules/arch/x86/tests/avx2.hex @@ -2103,3 +2103,195 @@ e2 ed 45 08 +c4 +e2 +e9 +92 +0c +08 +c4 +e2 +e9 +92 +0c +08 +c4 +e2 +ed +92 +0c +08 +c4 +e2 +ed +92 +0c +08 +c4 +e2 +e9 +93 +0c +08 +c4 +e2 +e9 +93 +0c +08 +c4 +e2 +ed +93 +0c +08 +c4 +e2 +ed +93 +0c +08 +c4 +e2 +69 +92 +0c +08 +c4 +e2 +69 +92 +0c +08 +c4 +e2 +6d +92 +0c +08 +c4 +e2 +6d +92 +0c +08 +c4 +e2 +69 +93 +0c +08 +c4 +e2 +69 +93 +0c +08 +c4 +e2 +6d +93 +0c +08 +c4 +e2 +6d +93 +0c +08 +c4 +e2 +69 +90 +0c +08 +c4 +e2 +69 +90 +0c +08 +c4 +e2 +6d +90 +0c +08 +c4 +e2 +6d +90 +0c +08 +c4 +e2 +69 +91 +0c +08 +c4 +e2 +69 +91 +0c +08 +c4 +e2 +6d +91 +0c +08 +c4 +e2 +6d +91 +0c +08 +c4 +e2 +e9 +90 +0c +08 +c4 +e2 +e9 +90 +0c +08 +c4 +e2 +ed +90 +0c +08 +c4 +e2 +ed +90 +0c +08 +c4 +e2 +e9 +91 +0c +08 +c4 +e2 +e9 +91 +0c +08 +c4 +e2 +ed +91 +0c +08 +c4 +e2 +ed +91 +0c +08 diff --git a/modules/arch/x86/tests/vsib-err.asm b/modules/arch/x86/tests/vsib-err.asm new file mode 100644 index 00000000..ee306038 --- /dev/null +++ b/modules/arch/x86/tests/vsib-err.asm @@ -0,0 +1,14 @@ +; Errors caught during instruction matching + +[bits 64] + +vpgatherdq xmm0,xmm0,xmm0 ; no reg EA template + +vpgatherdq xmm0,[ymm0],xmm0 ; not a VSIB256 template +vpgatherqq ymm0,[xmm0],ymm0 ; not a VSIB128 template + +vpgatherdq xmm0,[rel 0],xmm0 +vpgatherdq xmm0,[0],xmm0 +vpgatherdq xmm0,[rax],xmm0 +vpgatherdq xmm0,[rax+rbx],xmm0 + diff --git a/modules/arch/x86/tests/vsib-err.errwarn b/modules/arch/x86/tests/vsib-err.errwarn new file mode 100644 index 00000000..8d2d084f --- /dev/null +++ b/modules/arch/x86/tests/vsib-err.errwarn @@ -0,0 +1,7 @@ +-:5: error: invalid combination of opcode and operands +-:7: error: invalid combination of opcode and operands +-:8: error: invalid combination of opcode and operands +-:10: error: invalid combination of opcode and operands +-:11: error: invalid combination of opcode and operands +-:12: error: invalid combination of opcode and operands +-:13: error: invalid combination of opcode and operands diff --git a/modules/arch/x86/tests/vsib.asm b/modules/arch/x86/tests/vsib.asm new file mode 100644 index 00000000..fc2a705b --- /dev/null +++ b/modules/arch/x86/tests/vsib.asm @@ -0,0 +1,114 @@ +[bits 16] +; test promotion to 32-bit address size +vpgatherdq xmm0,[xmm0],xmm0 ; 67 c4 e2 f9 90 04 05 00 00 00 00 +vpgatherqq ymm0,[ymm0],ymm0 ; 67 c4 e2 fd 91 04 05 00 00 00 00 + +[bits 32] +; test promotion from base to index +vpgatherdq xmm0,[xmm0],xmm0 ; c4 e2 f9 90 04 05 00 00 00 00 +vpgatherqq ymm0,[ymm0],ymm0 ; c4 e2 fd 91 04 05 00 00 00 00 + +; various combinations +vpgatherdq xmm0,[ecx+xmm5],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ecx+ymm5],ymm0 ; c4 e2 fd 91 04 29 +vpgatherdq xmm0,[ebp+xmm5],xmm0 ; c4 e2 f9 90 44 2d 00 +vpgatherqq ymm0,[ebp+ymm5],ymm0 ; c4 e2 fd 91 44 2d 00 + +vpgatherdq xmm0,[xmm5+ecx],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ymm5+ecx],ymm0 ; c4 e2 fd 91 04 29 +vpgatherdq xmm0,[xmm5+ebp],xmm0 ; c4 e2 f9 90 44 2d 00 +vpgatherqq ymm0,[ymm5+ebp],ymm0 ; c4 e2 fd 91 44 2d 00 + +vpgatherdq xmm0,[ecx+xmm5*1],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ecx+ymm5*1],ymm0 ; c4 e2 fd 91 04 29 +vpgatherdq xmm0,[ebp+xmm5*1],xmm0 ; c4 e2 f9 90 44 2d 00 +vpgatherqq ymm0,[ebp+ymm5*1],ymm0 ; c4 e2 fd 91 44 2d 00 + +vpgatherdq xmm0,[xmm5+ecx*1],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ymm5+ecx*1],ymm0 ; c4 e2 fd 91 04 29 +vpgatherdq xmm0,[xmm5+ebp*1],xmm0 ; c4 e2 f9 90 44 2d 00 +vpgatherqq ymm0,[ymm5+ebp*1],ymm0 ; c4 e2 fd 91 44 2d 00 + +vpgatherdq xmm0,[nosplit 12345678h + xmm5*1],xmm0; c4 e2 f9 90 04 2d 78 56 34 12 +vpgatherqq ymm0,[nosplit 12345678h + ymm5*1],ymm0; c4 e2 fd 91 04 2d 78 56 34 12 + +vpgatherdq xmm0,[byte ecx + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 69 0c +vpgatherqq ymm0,[byte ecx + 12 + ymm5*2],ymm0 ; c4 e2 fd 91 44 69 0c +vpgatherdq xmm0,[byte ebp + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 6d 0c +vpgatherqq ymm0,[byte ebp + 12 + ymm5*2],ymm0 ; c4 e2 fd 91 44 6d 0c + +vpgatherdq xmm0,[dword ecx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 0c 00 00 00 +vpgatherqq ymm0,[dword ecx + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 84 a9 0c 00 00 00 +vpgatherdq xmm0,[dword ebp + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 ad 0c 00 00 00 +vpgatherqq ymm0,[dword ebp + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 84 ad 0c 00 00 00 + +vpgatherdq xmm0,[ecx + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 78 56 34 12 +vpgatherqq ymm0,[ecx + 12345678h + ymm5*4],ymm0 ; c4 e2 fd 91 84 a9 78 56 34 12 +vpgatherdq xmm0,[ebp + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 ad 78 56 34 12 +vpgatherqq ymm0,[ebp + 12345678h + ymm5*4],ymm0 ; c4 e2 fd 91 84 ad 78 56 34 12 + +vpgatherdq xmm0,[ecx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 a9 0c +vpgatherqq ymm0,[ecx + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 44 a9 0c +vpgatherdq xmm0,[ebp + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 ad 0c +vpgatherqq ymm0,[ebp + 12 + ymm5*4],ymm0 ; c4 e2 fd 91 44 ad 0c + +vpgatherdq xmm0,[dword 12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00 +vpgatherqq ymm0,[dword 12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00 +vpgatherdq xmm0,[12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00 +vpgatherqq ymm0,[12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00 + +[bits 64] +; test promotion from base to index +vpgatherdq xmm0,[xmm0],xmm0 ; c4 e2 f9 90 04 05 00 00 00 00 +vpgatherqq ymm0,[ymm0],ymm0 ; c4 e2 fd 91 04 05 00 00 00 00 + +; various combinations +vpgatherdq xmm0,[rcx+xmm5],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[rcx+ymm13],ymm0 ; c4 a2 fd 91 04 29 +vpgatherdq xmm0,[r13+xmm13],xmm0 ; c4 82 f9 90 44 2d 00 +vpgatherqq ymm0,[r13+ymm5],ymm0 ; c4 c2 fd 91 44 2d 00 + +vpgatherdq xmm0,[xmm5+rcx],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ymm13+rcx],ymm0 ; c4 a2 fd 91 04 29 +vpgatherdq xmm0,[xmm13+r13],xmm0 ; c4 82 f9 90 44 2d 00 +vpgatherqq ymm0,[ymm5+r13],ymm0 ; c4 c2 fd 91 44 2d 00 + +vpgatherdq xmm0,[rcx+xmm5*1],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[rcx+ymm13*1],ymm0 ; c4 a2 fd 91 04 29 +vpgatherdq xmm0,[r13+xmm13*1],xmm0 ; c4 82 f9 90 44 2d 00 +vpgatherqq ymm0,[r13+ymm5*1],ymm0 ; c4 c2 fd 91 44 2d 00 + +vpgatherdq xmm0,[xmm5+rcx*1],xmm0 ; c4 e2 f9 90 04 29 +vpgatherqq ymm0,[ymm13+rcx*1],ymm0 ; c4 a2 fd 91 04 29 +vpgatherdq xmm0,[xmm13+r13*1],xmm0 ; c4 82 f9 90 44 2d 00 +vpgatherqq ymm0,[ymm5+r13*1],ymm0 ; c4 c2 fd 91 44 2d 00 + +vpgatherdq xmm0,[nosplit 12345678h + xmm5*1],xmm0; c4 e2 f9 90 04 2d 78 56 34 12 +vpgatherqq ymm0,[nosplit 12345678h + ymm5*1],ymm0; c4 e2 fd 91 04 2d 78 56 34 12 + +vpgatherdq xmm0,[byte rcx + 12 + xmm5*2],xmm0 ; c4 e2 f9 90 44 69 0c +vpgatherqq ymm0,[byte rcx + 12 + ymm13*2],ymm0 ; c4 a2 fd 91 44 69 0c +vpgatherdq xmm0,[byte r13 + 12 + xmm13*2],xmm0 ; c4 82 f9 90 44 6d 0c +vpgatherqq ymm0,[byte r13 + 12 + ymm5*2],ymm0 ; c4 c2 fd 91 44 6d 0c + +vpgatherdq xmm0,[dword rcx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 0c 00 00 00 +vpgatherqq ymm0,[dword rcx + 12 + ymm13*4],ymm0 ; c4 a2 fd 91 84 a9 0c 00 00 00 +vpgatherdq xmm0,[dword r13 + 12 + xmm13*4],xmm0 ; c4 82 f9 90 84 ad 0c 00 00 00 +vpgatherqq ymm0,[dword r13 + 12 + ymm5*4],ymm0 ; c4 c2 fd 91 84 ad 0c 00 00 00 + +vpgatherdq xmm0,[rcx + 12345678h + xmm5*4],xmm0 ; c4 e2 f9 90 84 a9 78 56 34 12 +vpgatherqq ymm0,[rcx + 12345678h + ymm13*4],ymm0; c4 a2 fd 91 84 a9 78 56 34 12 +vpgatherdq xmm0,[r13 + 12345678h + xmm13*4],xmm0; c4 82 f9 90 84 ad 78 56 34 12 +vpgatherqq ymm0,[r13 + 12345678h + ymm5*4],ymm0 ; c4 c2 fd 91 84 ad 78 56 34 12 + +vpgatherdq xmm0,[rcx + 12 + xmm5*4],xmm0 ; c4 e2 f9 90 44 a9 0c +vpgatherqq ymm0,[rcx + 12 + ymm13*4],ymm0 ; c4 a2 fd 91 44 a9 0c +vpgatherdq xmm0,[r13 + 12 + xmm13*4],xmm0 ; c4 82 f9 90 44 ad 0c +vpgatherqq ymm0,[r13 + 12 + ymm5*4],ymm0 ; c4 c2 fd 91 44 ad 0c + +vpgatherdq xmm0,[dword 12 + xmm5*8],xmm0 ; c4 e2 f9 90 04 ed 0c 00 00 00 +vpgatherqq ymm0,[dword 12 + ymm13*8],ymm0 ; c4 a2 fd 91 04 ed 0c 00 00 00 +vpgatherdq xmm0,[12 + xmm13*8],xmm0 ; c4 a2 f9 90 04 ed 0c 00 00 00 +vpgatherqq ymm0,[12 + ymm5*8],ymm0 ; c4 e2 fd 91 04 ed 0c 00 00 00 + + diff --git a/modules/arch/x86/tests/vsib.hex b/modules/arch/x86/tests/vsib.hex new file mode 100644 index 00000000..7dfde886 --- /dev/null +++ b/modules/arch/x86/tests/vsib.hex @@ -0,0 +1,662 @@ +67 +c4 +e2 +f9 +90 +04 +05 +00 +00 +00 +00 +67 +c4 +e2 +fd +91 +04 +05 +00 +00 +00 +00 +c4 +e2 +f9 +90 +04 +05 +00 +00 +00 +00 +c4 +e2 +fd +91 +04 +05 +00 +00 +00 +00 +c4 +e2 +f9 +90 +04 +29 +c4 +e2 +fd +91 +04 +29 +c4 +e2 +f9 +90 +44 +2d +00 +c4 +e2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +e2 +fd +91 +04 +29 +c4 +e2 +f9 +90 +44 +2d +00 +c4 +e2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +e2 +fd +91 +04 +29 +c4 +e2 +f9 +90 +44 +2d +00 +c4 +e2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +e2 +fd +91 +04 +29 +c4 +e2 +f9 +90 +44 +2d +00 +c4 +e2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +2d +78 +56 +34 +12 +c4 +e2 +fd +91 +04 +2d +78 +56 +34 +12 +c4 +e2 +f9 +90 +44 +69 +0c +c4 +e2 +fd +91 +44 +69 +0c +c4 +e2 +f9 +90 +44 +6d +0c +c4 +e2 +fd +91 +44 +6d +0c +c4 +e2 +f9 +90 +84 +a9 +0c +00 +00 +00 +c4 +e2 +fd +91 +84 +a9 +0c +00 +00 +00 +c4 +e2 +f9 +90 +84 +ad +0c +00 +00 +00 +c4 +e2 +fd +91 +84 +ad +0c +00 +00 +00 +c4 +e2 +f9 +90 +84 +a9 +78 +56 +34 +12 +c4 +e2 +fd +91 +84 +a9 +78 +56 +34 +12 +c4 +e2 +f9 +90 +84 +ad +78 +56 +34 +12 +c4 +e2 +fd +91 +84 +ad +78 +56 +34 +12 +c4 +e2 +f9 +90 +44 +a9 +0c +c4 +e2 +fd +91 +44 +a9 +0c +c4 +e2 +f9 +90 +44 +ad +0c +c4 +e2 +fd +91 +44 +ad +0c +c4 +e2 +f9 +90 +04 +ed +0c +00 +00 +00 +c4 +e2 +fd +91 +04 +ed +0c +00 +00 +00 +c4 +e2 +f9 +90 +04 +ed +0c +00 +00 +00 +c4 +e2 +fd +91 +04 +ed +0c +00 +00 +00 +c4 +e2 +f9 +90 +04 +05 +00 +00 +00 +00 +c4 +e2 +fd +91 +04 +05 +00 +00 +00 +00 +c4 +e2 +f9 +90 +04 +29 +c4 +a2 +fd +91 +04 +29 +c4 +82 +f9 +90 +44 +2d +00 +c4 +c2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +a2 +fd +91 +04 +29 +c4 +82 +f9 +90 +44 +2d +00 +c4 +c2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +a2 +fd +91 +04 +29 +c4 +82 +f9 +90 +44 +2d +00 +c4 +c2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +29 +c4 +a2 +fd +91 +04 +29 +c4 +82 +f9 +90 +44 +2d +00 +c4 +c2 +fd +91 +44 +2d +00 +c4 +e2 +f9 +90 +04 +2d +78 +56 +34 +12 +c4 +e2 +fd +91 +04 +2d +78 +56 +34 +12 +c4 +e2 +f9 +90 +44 +69 +0c +c4 +a2 +fd +91 +44 +69 +0c +c4 +82 +f9 +90 +44 +6d +0c +c4 +c2 +fd +91 +44 +6d +0c +c4 +e2 +f9 +90 +84 +a9 +0c +00 +00 +00 +c4 +a2 +fd +91 +84 +a9 +0c +00 +00 +00 +c4 +82 +f9 +90 +84 +ad +0c +00 +00 +00 +c4 +c2 +fd +91 +84 +ad +0c +00 +00 +00 +c4 +e2 +f9 +90 +84 +a9 +78 +56 +34 +12 +c4 +a2 +fd +91 +84 +a9 +78 +56 +34 +12 +c4 +82 +f9 +90 +84 +ad +78 +56 +34 +12 +c4 +c2 +fd +91 +84 +ad +78 +56 +34 +12 +c4 +e2 +f9 +90 +44 +a9 +0c +c4 +a2 +fd +91 +44 +a9 +0c +c4 +82 +f9 +90 +44 +ad +0c +c4 +c2 +fd +91 +44 +ad +0c +c4 +e2 +f9 +90 +04 +ed +0c +00 +00 +00 +c4 +a2 +fd +91 +04 +ed +0c +00 +00 +00 +c4 +a2 +f9 +90 +04 +ed +0c +00 +00 +00 +c4 +e2 +fd +91 +04 +ed +0c +00 +00 +00 diff --git a/modules/arch/x86/tests/vsib2-err.asm b/modules/arch/x86/tests/vsib2-err.asm new file mode 100644 index 00000000..7e3ac55e --- /dev/null +++ b/modules/arch/x86/tests/vsib2-err.asm @@ -0,0 +1,19 @@ +; Errors caught during EA checking + +[bits 32] +vpgatherqq ymm0,[ymm0+ecx*2],ymm0 + +[bits 64] +addpd xmm0,[xmm0] ; not a VSIB128 template +addpd xmm0,[ymm0] ; not a VSIB256 template + +[bits 32] +vpgatherdq xmm0,[bp+xmm0],xmm0 + +vpgatherdq xmm0,[xmm0+ymm0],xmm0 + +vpgatherqq ymm0,[word ymm0],ymm0 + +vpgatherqq ymm0,[byte ymm0],ymm0 + + diff --git a/modules/arch/x86/tests/vsib2-err.errwarn b/modules/arch/x86/tests/vsib2-err.errwarn new file mode 100644 index 00000000..877ce62e --- /dev/null +++ b/modules/arch/x86/tests/vsib2-err.errwarn @@ -0,0 +1,7 @@ +-:4: error: invalid effective address +-:7: error: invalid effective address +-:8: error: invalid effective address +-:11: error: invalid effective address +-:13: error: invalid effective address +-:15: error: invalid effective address +-:17: warning: invalid displacement size; fixed diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index 8e7caf82..c46ea5b8 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -177,6 +177,9 @@ int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3, typedef struct x86_effaddr { yasm_effaddr ea; /* base structure */ + /* VSIB uses the normal SIB byte, but this flag enables it. */ + unsigned char vsib_mode; /* 0 if not, 1 if XMM, 2 if YMM */ + /* How the spare (register) bits in Mod/RM are handled: * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) * They're set in bytecode_create_insn(). diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index 49df4f8f..8666943e 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -191,6 +191,7 @@ ea_create(void) x86_ea->ea.pc_rel = 0; x86_ea->ea.not_pc_rel = 0; x86_ea->ea.data_len = 0; + x86_ea->vsib_mode = 0; x86_ea->modrm = 0; x86_ea->valid_modrm = 0; x86_ea->need_modrm = 0; @@ -382,6 +383,8 @@ yasm_x86__ea_print(const yasm_effaddr *ea, FILE *f, int indent_level) fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit); fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "", (unsigned int)x86_ea->ea.segreg); + fprintf(f, "%*sVSIBMode=%u\n", indent_level, "", + (unsigned int)x86_ea->vsib_mode); fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "", (unsigned int)x86_ea->modrm, (unsigned int)x86_ea->valid_modrm, (unsigned int)x86_ea->need_modrm); diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c index 814a9b2a..cacf827a 100644 --- a/modules/arch/x86/x86expr.c +++ b/modules/arch/x86/x86expr.c @@ -34,6 +34,7 @@ typedef struct x86_checkea_reg3264_data { int *regs; /* total multiplier for each reg */ + unsigned char vsib_mode; unsigned char bits; unsigned char addrsize; } x86_checkea_reg3264_data; @@ -58,6 +59,20 @@ x86_expr_checkea_get_reg3264(yasm_expr__item *ei, int *regnum, return 0; *regnum = (unsigned int)(ei->data.reg & 0xF); break; + case X86_XMMREG: + if (data->vsib_mode != 1) + return 0; + if (data->bits != 64 && (ei->data.reg & 0x8) == 0x8) + return 0; + *regnum = 17+(unsigned int)(ei->data.reg & 0xF); + break; + case X86_YMMREG: + if (data->vsib_mode != 2) + return 0; + if (data->bits != 64 && (ei->data.reg & 0x8) == 0x8) + return 0; + *regnum = 17+(unsigned int)(ei->data.reg & 0xF); + break; case X86_RIP: if (data->bits != 64) return 0; @@ -606,6 +621,11 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, } /*@fallthrough@*/ default: + /* If SIB is required, but we're in 16-bit mode, set to 32. */ + if (bits == 16 && x86_ea->need_sib == 1) { + *addrsize = 32; + break; + } /* check for use of 16 or 32-bit registers; if none are used * default to bits setting. */ @@ -643,13 +663,19 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, REG64_R13, REG64_R14, REG64_R15, - REG64_RIP + REG64_RIP, + SIMDREGS } reg3264type; - int reg3264mult[17] = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int reg3264mult[33] = + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; x86_checkea_reg3264_data reg3264_data; int basereg = REG3264_NONE; /* "base" register (for SIB) */ int indexreg = REG3264_NONE; /* "index" register (for SIB) */ + int regcount = 17; /* normally don't check SIMD regs */ + + if (x86_ea->vsib_mode != 0) + regcount = 33; /* We can only do 64-bit addresses in 64-bit mode. */ if (*addrsize == 64 && bits != 64) { @@ -665,6 +691,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, } reg3264_data.regs = reg3264mult; + reg3264_data.vsib_mode = x86_ea->vsib_mode; reg3264_data.bits = bits; reg3264_data.addrsize = *addrsize; if (x86_ea->ea.disp.abs) { @@ -698,7 +725,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, * Also, if an indexreg hasn't been assigned, try to find one. * Meanwhile, check to make sure there's no negative register mults. */ - for (i=0; i<17; i++) { + for (i=0; i<regcount; i++) { if (reg3264mult[i] < 0) { yasm_error_set(YASM_ERROR_VALUE, N_("invalid effective address")); @@ -711,10 +738,26 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, indexreg = i; } - /* Handle certain special cases of indexreg mults when basereg is - * empty. - */ - if (indexreg != REG3264_NONE && basereg == REG3264_NONE) + if (x86_ea->vsib_mode != 0) { + /* For VSIB, the SIMD register needs to go into the indexreg. + * Also check basereg (must be a GPR if present) and indexreg + * (must be a SIMD register). + */ + if (basereg >= SIMDREGS && + (indexreg == REG3264_NONE || reg3264mult[indexreg] == 1)) { + int temp = basereg; + basereg = indexreg; + indexreg = temp; + } + if (basereg >= REG64_RIP || indexreg < SIMDREGS) { + yasm_error_set(YASM_ERROR_VALUE, + N_("invalid effective address")); + return 1; + } + } else if (indexreg != REG3264_NONE && basereg == REG3264_NONE) + /* Handle certain special cases of indexreg mults when basereg is + * empty. + */ switch (reg3264mult[indexreg]) { case 1: /* Only optimize this way if nosplit wasn't specified */ @@ -741,7 +784,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, /* Make sure there's no other registers than the basereg and indexreg * we just found. */ - for (i=0; i<17; i++) + for (i=0; i<regcount; i++) if (i != basereg && i != indexreg && reg3264mult[i] != 0) { yasm_error_set(YASM_ERROR_VALUE, N_("invalid effective address")); @@ -861,10 +904,17 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, x86_ea->sib |= 040; /* Any scale field is valid, just leave at 0. */ else { - if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int) - (X86_REG64 | indexreg), bits, - X86_REX_X)) - return 1; + if (indexreg >= SIMDREGS) { + if (yasm_x86__set_rex_from_reg(rex, &low3, + (unsigned int)(X86_XMMREG | (indexreg-SIMDREGS)), + bits, X86_REX_X)) + return 1; + } else { + if (yasm_x86__set_rex_from_reg(rex, &low3, + (unsigned int)(X86_REG64 | indexreg), + bits, X86_REX_X)) + return 1; + } x86_ea->sib |= low3 << 3; /* Set scale field, 1 case -> 0, so don't bother. */ switch (reg3264mult[indexreg]) { diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c index c734d335..0e8a5be9 100644 --- a/modules/arch/x86/x86id.c +++ b/modules/arch/x86/x86id.c @@ -111,7 +111,11 @@ enum x86_operand_type { */ OPT_MemrAX = 25, /* EAX memory operand only (EA) [special case for SVM skinit opcode] */ - OPT_MemEAX = 26 + OPT_MemEAX = 26, + /* XMM VSIB memory operand */ + OPT_MemXMMIndex = 27, + /* YMM VSIB memory operand */ + OPT_MemYMMIndex = 28 }; enum x86_operand_size { @@ -347,6 +351,37 @@ static const yasm_bytecode_callback x86_id_insn_callback = { #include "x86insns.c" +/* Looks for the first SIMD register match for the purposes of VSIB matching. + * Full legality checking is performed in EA code. + */ +static int +x86_expr_contains_simd_cb(const yasm_expr__item *ei, void *d) +{ + int ymm = *((int *)d); + if (ei->type != YASM_EXPR_REG) + return 0; + switch ((x86_expritem_reg_size)(ei->data.reg & ~0xFUL)) { + case X86_XMMREG: + if (!ymm) + return 1; + break; + case X86_YMMREG: + if (ymm) + return 1; + break; + default: + break; + } + return 0; +} + +static int +x86_expr_contains_simd(const yasm_expr *e, int ymm) +{ + return yasm_expr__traverse_leaves_in_const(e, &ymm, + x86_expr_contains_simd_cb); +} + static void x86_finalize_common(x86_common *common, const x86_insn_info *info, unsigned int mode_bits) @@ -851,6 +886,16 @@ x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops, mismatch = 1; break; } + case OPT_MemXMMIndex: + if (op->type != YASM_INSN__OPERAND_MEMORY || + !x86_expr_contains_simd(op->data.ea->disp.abs, 0)) + mismatch = 1; + break; + case OPT_MemYMMIndex: + if (op->type != YASM_INSN__OPERAND_MEMORY || + !x86_expr_contains_simd(op->data.ea->disp.abs, 1)) + mismatch = 1; + break; default: yasm_internal_error(N_("invalid operand type")); } @@ -1231,12 +1276,20 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) if (info_ops[i].type == OPT_MemOffs) /* Special-case for MOV MemOffs instruction */ yasm_x86__ea_set_disponly(insn->x86_ea); - else if (id_insn->default_rel && - !op->data.ea->not_pc_rel && - op->data.ea->segreg != 0x6404 && - op->data.ea->segreg != 0x6505 && - !yasm_expr__contains( - op->data.ea->disp.abs, YASM_EXPR_REG)) + else if (info_ops[i].type == OPT_MemXMMIndex) { + /* Remember VSIB mode */ + insn->x86_ea->vsib_mode = 1; + insn->x86_ea->need_sib = 1; + } else if (info_ops[i].type == OPT_MemYMMIndex) { + /* Remember VSIB mode */ + insn->x86_ea->vsib_mode = 2; + insn->x86_ea->need_sib = 1; + } else if (id_insn->default_rel && + !op->data.ea->not_pc_rel && + op->data.ea->segreg != 0x6404 && + op->data.ea->segreg != 0x6505 && + !yasm_expr__contains( + op->data.ea->disp.abs, YASM_EXPR_REG)) /* Enable default PC-rel if no regs and segreg * is not FS or GS. */ |